1 /* 2 * Copyright © 2015-2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Robert Bragg <robert@sixbynine.org> 25 */ 26 27 28 /** 29 * DOC: i915 Perf Overview 30 * 31 * Gen graphics supports a large number of performance counters that can help 32 * driver and application developers understand and optimize their use of the 33 * GPU. 34 * 35 * This i915 perf interface enables userspace to configure and open a file 36 * descriptor representing a stream of GPU metrics which can then be read() as 37 * a stream of sample records. 38 * 39 * The interface is particularly suited to exposing buffered metrics that are 40 * captured by DMA from the GPU, unsynchronized with and unrelated to the CPU. 41 * 42 * Streams representing a single context are accessible to applications with a 43 * corresponding drm file descriptor, such that OpenGL can use the interface 44 * without special privileges. Access to system-wide metrics requires root 45 * privileges by default, unless changed via the dev.i915.perf_event_paranoid 46 * sysctl option. 47 * 48 */ 49 50 /** 51 * DOC: i915 Perf History and Comparison with Core Perf 52 * 53 * The interface was initially inspired by the core Perf infrastructure but 54 * some notable differences are: 55 * 56 * i915 perf file descriptors represent a "stream" instead of an "event"; where 57 * a perf event primarily corresponds to a single 64bit value, while a stream 58 * might sample sets of tightly-coupled counters, depending on the 59 * configuration. For example the Gen OA unit isn't designed to support 60 * orthogonal configurations of individual counters; it's configured for a set 61 * of related counters. Samples for an i915 perf stream capturing OA metrics 62 * will include a set of counter values packed in a compact HW specific format. 63 * The OA unit supports a number of different packing formats which can be 64 * selected by the user opening the stream. Perf has support for grouping 65 * events, but each event in the group is configured, validated and 66 * authenticated individually with separate system calls. 67 * 68 * i915 perf stream configurations are provided as an array of u64 (key,value) 69 * pairs, instead of a fixed struct with multiple miscellaneous config members, 70 * interleaved with event-type specific members. 71 * 72 * i915 perf doesn't support exposing metrics via an mmap'd circular buffer. 73 * The supported metrics are being written to memory by the GPU unsynchronized 74 * with the CPU, using HW specific packing formats for counter sets. Sometimes 75 * the constraints on HW configuration require reports to be filtered before it 76 * would be acceptable to expose them to unprivileged applications - to hide 77 * the metrics of other processes/contexts. For these use cases a read() based 78 * interface is a good fit, and provides an opportunity to filter data as it 79 * gets copied from the GPU mapped buffers to userspace buffers. 80 * 81 * 82 * Issues hit with first prototype based on Core Perf 83 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 84 * 85 * The first prototype of this driver was based on the core perf 86 * infrastructure, and while we did make that mostly work, with some changes to 87 * perf, we found we were breaking or working around too many assumptions baked 88 * into perf's currently cpu centric design. 89 * 90 * In the end we didn't see a clear benefit to making perf's implementation and 91 * interface more complex by changing design assumptions while we knew we still 92 * wouldn't be able to use any existing perf based userspace tools. 93 * 94 * Also considering the Gen specific nature of the Observability hardware and 95 * how userspace will sometimes need to combine i915 perf OA metrics with 96 * side-band OA data captured via MI_REPORT_PERF_COUNT commands; we're 97 * expecting the interface to be used by a platform specific userspace such as 98 * OpenGL or tools. This is to say; we aren't inherently missing out on having 99 * a standard vendor/architecture agnostic interface by not using perf. 100 * 101 * 102 * For posterity, in case we might re-visit trying to adapt core perf to be 103 * better suited to exposing i915 metrics these were the main pain points we 104 * hit: 105 * 106 * - The perf based OA PMU driver broke some significant design assumptions: 107 * 108 * Existing perf pmus are used for profiling work on a cpu and we were 109 * introducing the idea of _IS_DEVICE pmus with different security 110 * implications, the need to fake cpu-related data (such as user/kernel 111 * registers) to fit with perf's current design, and adding _DEVICE records 112 * as a way to forward device-specific status records. 113 * 114 * The OA unit writes reports of counters into a circular buffer, without 115 * involvement from the CPU, making our PMU driver the first of a kind. 116 * 117 * Given the way we were periodically forward data from the GPU-mapped, OA 118 * buffer to perf's buffer, those bursts of sample writes looked to perf like 119 * we were sampling too fast and so we had to subvert its throttling checks. 120 * 121 * Perf supports groups of counters and allows those to be read via 122 * transactions internally but transactions currently seem designed to be 123 * explicitly initiated from the cpu (say in response to a userspace read()) 124 * and while we could pull a report out of the OA buffer we can't 125 * trigger a report from the cpu on demand. 126 * 127 * Related to being report based; the OA counters are configured in HW as a 128 * set while perf generally expects counter configurations to be orthogonal. 129 * Although counters can be associated with a group leader as they are 130 * opened, there's no clear precedent for being able to provide group-wide 131 * configuration attributes (for example we want to let userspace choose the 132 * OA unit report format used to capture all counters in a set, or specify a 133 * GPU context to filter metrics on). We avoided using perf's grouping 134 * feature and forwarded OA reports to userspace via perf's 'raw' sample 135 * field. This suited our userspace well considering how coupled the counters 136 * are when dealing with normalizing. It would be inconvenient to split 137 * counters up into separate events, only to require userspace to recombine 138 * them. For Mesa it's also convenient to be forwarded raw, periodic reports 139 * for combining with the side-band raw reports it captures using 140 * MI_REPORT_PERF_COUNT commands. 141 * 142 * - As a side note on perf's grouping feature; there was also some concern 143 * that using PERF_FORMAT_GROUP as a way to pack together counter values 144 * would quite drastically inflate our sample sizes, which would likely 145 * lower the effective sampling resolutions we could use when the available 146 * memory bandwidth is limited. 147 * 148 * With the OA unit's report formats, counters are packed together as 32 149 * or 40bit values, with the largest report size being 256 bytes. 150 * 151 * PERF_FORMAT_GROUP values are 64bit, but there doesn't appear to be a 152 * documented ordering to the values, implying PERF_FORMAT_ID must also be 153 * used to add a 64bit ID before each value; giving 16 bytes per counter. 154 * 155 * Related to counter orthogonality; we can't time share the OA unit, while 156 * event scheduling is a central design idea within perf for allowing 157 * userspace to open + enable more events than can be configured in HW at any 158 * one time. The OA unit is not designed to allow re-configuration while in 159 * use. We can't reconfigure the OA unit without losing internal OA unit 160 * state which we can't access explicitly to save and restore. Reconfiguring 161 * the OA unit is also relatively slow, involving ~100 register writes. From 162 * userspace Mesa also depends on a stable OA configuration when emitting 163 * MI_REPORT_PERF_COUNT commands and importantly the OA unit can't be 164 * disabled while there are outstanding MI_RPC commands lest we hang the 165 * command streamer. 166 * 167 * The contents of sample records aren't extensible by device drivers (i.e. 168 * the sample_type bits). As an example; Sourab Gupta had been looking to 169 * attach GPU timestamps to our OA samples. We were shoehorning OA reports 170 * into sample records by using the 'raw' field, but it's tricky to pack more 171 * than one thing into this field because events/core.c currently only lets a 172 * pmu give a single raw data pointer plus len which will be copied into the 173 * ring buffer. To include more than the OA report we'd have to copy the 174 * report into an intermediate larger buffer. I'd been considering allowing a 175 * vector of data+len values to be specified for copying the raw data, but 176 * it felt like a kludge to being using the raw field for this purpose. 177 * 178 * - It felt like our perf based PMU was making some technical compromises 179 * just for the sake of using perf: 180 * 181 * perf_event_open() requires events to either relate to a pid or a specific 182 * cpu core, while our device pmu related to neither. Events opened with a 183 * pid will be automatically enabled/disabled according to the scheduling of 184 * that process - so not appropriate for us. When an event is related to a 185 * cpu id, perf ensures pmu methods will be invoked via an inter process 186 * interrupt on that core. To avoid invasive changes our userspace opened OA 187 * perf events for a specific cpu. This was workable but it meant the 188 * majority of the OA driver ran in atomic context, including all OA report 189 * forwarding, which wasn't really necessary in our case and seems to make 190 * our locking requirements somewhat complex as we handled the interaction 191 * with the rest of the i915 driver. 192 */ 193 194 #include <linux/anon_inodes.h> 195 #include <linux/sizes.h> 196 #include <linux/uuid.h> 197 198 #include "gem/i915_gem_context.h" 199 #include "gem/i915_gem_pm.h" 200 #include "gt/intel_lrc_reg.h" 201 202 #include "i915_drv.h" 203 #include "i915_oa_hsw.h" 204 #include "i915_oa_bdw.h" 205 #include "i915_oa_chv.h" 206 #include "i915_oa_sklgt2.h" 207 #include "i915_oa_sklgt3.h" 208 #include "i915_oa_sklgt4.h" 209 #include "i915_oa_bxt.h" 210 #include "i915_oa_kblgt2.h" 211 #include "i915_oa_kblgt3.h" 212 #include "i915_oa_glk.h" 213 #include "i915_oa_cflgt2.h" 214 #include "i915_oa_cflgt3.h" 215 #include "i915_oa_cnl.h" 216 #include "i915_oa_icl.h" 217 218 /* HW requires this to be a power of two, between 128k and 16M, though driver 219 * is currently generally designed assuming the largest 16M size is used such 220 * that the overflow cases are unlikely in normal operation. 221 */ 222 #define OA_BUFFER_SIZE SZ_16M 223 224 #define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1)) 225 226 /** 227 * DOC: OA Tail Pointer Race 228 * 229 * There's a HW race condition between OA unit tail pointer register updates and 230 * writes to memory whereby the tail pointer can sometimes get ahead of what's 231 * been written out to the OA buffer so far (in terms of what's visible to the 232 * CPU). 233 * 234 * Although this can be observed explicitly while copying reports to userspace 235 * by checking for a zeroed report-id field in tail reports, we want to account 236 * for this earlier, as part of the oa_buffer_check to avoid lots of redundant 237 * read() attempts. 238 * 239 * In effect we define a tail pointer for reading that lags the real tail 240 * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough 241 * time for the corresponding reports to become visible to the CPU. 242 * 243 * To manage this we actually track two tail pointers: 244 * 1) An 'aging' tail with an associated timestamp that is tracked until we 245 * can trust the corresponding data is visible to the CPU; at which point 246 * it is considered 'aged'. 247 * 2) An 'aged' tail that can be used for read()ing. 248 * 249 * The two separate pointers let us decouple read()s from tail pointer aging. 250 * 251 * The tail pointers are checked and updated at a limited rate within a hrtimer 252 * callback (the same callback that is used for delivering EPOLLIN events) 253 * 254 * Initially the tails are marked invalid with %INVALID_TAIL_PTR which 255 * indicates that an updated tail pointer is needed. 256 * 257 * Most of the implementation details for this workaround are in 258 * oa_buffer_check_unlocked() and _append_oa_reports() 259 * 260 * Note for posterity: previously the driver used to define an effective tail 261 * pointer that lagged the real pointer by a 'tail margin' measured in bytes 262 * derived from %OA_TAIL_MARGIN_NSEC and the configured sampling frequency. 263 * This was flawed considering that the OA unit may also automatically generate 264 * non-periodic reports (such as on context switch) or the OA unit may be 265 * enabled without any periodic sampling. 266 */ 267 #define OA_TAIL_MARGIN_NSEC 100000ULL 268 #define INVALID_TAIL_PTR 0xffffffff 269 270 /* frequency for checking whether the OA unit has written new reports to the 271 * circular OA buffer... 272 */ 273 #define POLL_FREQUENCY 200 274 #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY) 275 276 /* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */ 277 static int zero; 278 static int one = 1; 279 static u32 i915_perf_stream_paranoid = true; 280 281 /* The maximum exponent the hardware accepts is 63 (essentially it selects one 282 * of the 64bit timestamp bits to trigger reports from) but there's currently 283 * no known use case for sampling as infrequently as once per 47 thousand years. 284 * 285 * Since the timestamps included in OA reports are only 32bits it seems 286 * reasonable to limit the OA exponent where it's still possible to account for 287 * overflow in OA report timestamps. 288 */ 289 #define OA_EXPONENT_MAX 31 290 291 #define INVALID_CTX_ID 0xffffffff 292 293 /* On Gen8+ automatically triggered OA reports include a 'reason' field... */ 294 #define OAREPORT_REASON_MASK 0x3f 295 #define OAREPORT_REASON_SHIFT 19 296 #define OAREPORT_REASON_TIMER (1<<0) 297 #define OAREPORT_REASON_CTX_SWITCH (1<<3) 298 #define OAREPORT_REASON_CLK_RATIO (1<<5) 299 300 301 /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate 302 * 303 * The highest sampling frequency we can theoretically program the OA unit 304 * with is always half the timestamp frequency: E.g. 6.25Mhz for Haswell. 305 * 306 * Initialized just before we register the sysctl parameter. 307 */ 308 static int oa_sample_rate_hard_limit; 309 310 /* Theoretically we can program the OA unit to sample every 160ns but don't 311 * allow that by default unless root... 312 * 313 * The default threshold of 100000Hz is based on perf's similar 314 * kernel.perf_event_max_sample_rate sysctl parameter. 315 */ 316 static u32 i915_oa_max_sample_rate = 100000; 317 318 /* XXX: beware if future OA HW adds new report formats that the current 319 * code assumes all reports have a power-of-two size and ~(size - 1) can 320 * be used as a mask to align the OA tail pointer. 321 */ 322 static const struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = { 323 [I915_OA_FORMAT_A13] = { 0, 64 }, 324 [I915_OA_FORMAT_A29] = { 1, 128 }, 325 [I915_OA_FORMAT_A13_B8_C8] = { 2, 128 }, 326 /* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */ 327 [I915_OA_FORMAT_B4_C8] = { 4, 64 }, 328 [I915_OA_FORMAT_A45_B8_C8] = { 5, 256 }, 329 [I915_OA_FORMAT_B4_C8_A16] = { 6, 128 }, 330 [I915_OA_FORMAT_C4_B8] = { 7, 64 }, 331 }; 332 333 static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { 334 [I915_OA_FORMAT_A12] = { 0, 64 }, 335 [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 }, 336 [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, 337 [I915_OA_FORMAT_C4_B8] = { 7, 64 }, 338 }; 339 340 #define SAMPLE_OA_REPORT (1<<0) 341 342 /** 343 * struct perf_open_properties - for validated properties given to open a stream 344 * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags 345 * @single_context: Whether a single or all gpu contexts should be monitored 346 * @ctx_handle: A gem ctx handle for use with @single_context 347 * @metrics_set: An ID for an OA unit metric set advertised via sysfs 348 * @oa_format: An OA unit HW report format 349 * @oa_periodic: Whether to enable periodic OA unit sampling 350 * @oa_period_exponent: The OA unit sampling period is derived from this 351 * 352 * As read_properties_unlocked() enumerates and validates the properties given 353 * to open a stream of metrics the configuration is built up in the structure 354 * which starts out zero initialized. 355 */ 356 struct perf_open_properties { 357 u32 sample_flags; 358 359 u64 single_context:1; 360 u64 ctx_handle; 361 362 /* OA sampling state */ 363 int metrics_set; 364 int oa_format; 365 bool oa_periodic; 366 int oa_period_exponent; 367 }; 368 369 static void free_oa_config(struct drm_i915_private *dev_priv, 370 struct i915_oa_config *oa_config) 371 { 372 if (!PTR_ERR(oa_config->flex_regs)) 373 kfree(oa_config->flex_regs); 374 if (!PTR_ERR(oa_config->b_counter_regs)) 375 kfree(oa_config->b_counter_regs); 376 if (!PTR_ERR(oa_config->mux_regs)) 377 kfree(oa_config->mux_regs); 378 kfree(oa_config); 379 } 380 381 static void put_oa_config(struct drm_i915_private *dev_priv, 382 struct i915_oa_config *oa_config) 383 { 384 if (!atomic_dec_and_test(&oa_config->ref_count)) 385 return; 386 387 free_oa_config(dev_priv, oa_config); 388 } 389 390 static int get_oa_config(struct drm_i915_private *dev_priv, 391 int metrics_set, 392 struct i915_oa_config **out_config) 393 { 394 int ret; 395 396 if (metrics_set == 1) { 397 *out_config = &dev_priv->perf.oa.test_config; 398 atomic_inc(&dev_priv->perf.oa.test_config.ref_count); 399 return 0; 400 } 401 402 ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); 403 if (ret) 404 return ret; 405 406 *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set); 407 if (!*out_config) 408 ret = -EINVAL; 409 else 410 atomic_inc(&(*out_config)->ref_count); 411 412 mutex_unlock(&dev_priv->perf.metrics_lock); 413 414 return ret; 415 } 416 417 static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv) 418 { 419 return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; 420 } 421 422 static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv) 423 { 424 u32 oastatus1 = I915_READ(GEN7_OASTATUS1); 425 426 return oastatus1 & GEN7_OASTATUS1_TAIL_MASK; 427 } 428 429 /** 430 * oa_buffer_check_unlocked - check for data and update tail ptr state 431 * @dev_priv: i915 device instance 432 * 433 * This is either called via fops (for blocking reads in user ctx) or the poll 434 * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check 435 * if there is data available for userspace to read. 436 * 437 * This function is central to providing a workaround for the OA unit tail 438 * pointer having a race with respect to what data is visible to the CPU. 439 * It is responsible for reading tail pointers from the hardware and giving 440 * the pointers time to 'age' before they are made available for reading. 441 * (See description of OA_TAIL_MARGIN_NSEC above for further details.) 442 * 443 * Besides returning true when there is data available to read() this function 444 * also has the side effect of updating the oa_buffer.tails[], .aging_timestamp 445 * and .aged_tail_idx state used for reading. 446 * 447 * Note: It's safe to read OA config state here unlocked, assuming that this is 448 * only called while the stream is enabled, while the global OA configuration 449 * can't be modified. 450 * 451 * Returns: %true if the OA buffer contains data, else %false 452 */ 453 static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) 454 { 455 int report_size = dev_priv->perf.oa.oa_buffer.format_size; 456 unsigned long flags; 457 unsigned int aged_idx; 458 u32 head, hw_tail, aged_tail, aging_tail; 459 u64 now; 460 461 /* We have to consider the (unlikely) possibility that read() errors 462 * could result in an OA buffer reset which might reset the head, 463 * tails[] and aged_tail state. 464 */ 465 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 466 467 /* NB: The head we observe here might effectively be a little out of 468 * date (between head and tails[aged_idx].offset if there is currently 469 * a read() in progress. 470 */ 471 head = dev_priv->perf.oa.oa_buffer.head; 472 473 aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx; 474 aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset; 475 aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset; 476 477 hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv); 478 479 /* The tail pointer increases in 64 byte increments, 480 * not in report_size steps... 481 */ 482 hw_tail &= ~(report_size - 1); 483 484 now = ktime_get_mono_fast_ns(); 485 486 /* Update the aged tail 487 * 488 * Flip the tail pointer available for read()s once the aging tail is 489 * old enough to trust that the corresponding data will be visible to 490 * the CPU... 491 * 492 * Do this before updating the aging pointer in case we may be able to 493 * immediately start aging a new pointer too (if new data has become 494 * available) without needing to wait for a later hrtimer callback. 495 */ 496 if (aging_tail != INVALID_TAIL_PTR && 497 ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) > 498 OA_TAIL_MARGIN_NSEC)) { 499 500 aged_idx ^= 1; 501 dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx; 502 503 aged_tail = aging_tail; 504 505 /* Mark that we need a new pointer to start aging... */ 506 dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR; 507 aging_tail = INVALID_TAIL_PTR; 508 } 509 510 /* Update the aging tail 511 * 512 * We throttle aging tail updates until we have a new tail that 513 * represents >= one report more data than is already available for 514 * reading. This ensures there will be enough data for a successful 515 * read once this new pointer has aged and ensures we will give the new 516 * pointer time to age. 517 */ 518 if (aging_tail == INVALID_TAIL_PTR && 519 (aged_tail == INVALID_TAIL_PTR || 520 OA_TAKEN(hw_tail, aged_tail) >= report_size)) { 521 struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma; 522 u32 gtt_offset = i915_ggtt_offset(vma); 523 524 /* Be paranoid and do a bounds check on the pointer read back 525 * from hardware, just in case some spurious hardware condition 526 * could put the tail out of bounds... 527 */ 528 if (hw_tail >= gtt_offset && 529 hw_tail < (gtt_offset + OA_BUFFER_SIZE)) { 530 dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = 531 aging_tail = hw_tail; 532 dev_priv->perf.oa.oa_buffer.aging_timestamp = now; 533 } else { 534 DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n", 535 hw_tail); 536 } 537 } 538 539 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 540 541 return aged_tail == INVALID_TAIL_PTR ? 542 false : OA_TAKEN(aged_tail, head) >= report_size; 543 } 544 545 /** 546 * append_oa_status - Appends a status record to a userspace read() buffer. 547 * @stream: An i915-perf stream opened for OA metrics 548 * @buf: destination buffer given by userspace 549 * @count: the number of bytes userspace wants to read 550 * @offset: (inout): the current position for writing into @buf 551 * @type: The kind of status to report to userspace 552 * 553 * Writes a status record (such as `DRM_I915_PERF_RECORD_OA_REPORT_LOST`) 554 * into the userspace read() buffer. 555 * 556 * The @buf @offset will only be updated on success. 557 * 558 * Returns: 0 on success, negative error code on failure. 559 */ 560 static int append_oa_status(struct i915_perf_stream *stream, 561 char __user *buf, 562 size_t count, 563 size_t *offset, 564 enum drm_i915_perf_record_type type) 565 { 566 struct drm_i915_perf_record_header header = { type, 0, sizeof(header) }; 567 568 if ((count - *offset) < header.size) 569 return -ENOSPC; 570 571 if (copy_to_user(buf + *offset, &header, sizeof(header))) 572 return -EFAULT; 573 574 (*offset) += header.size; 575 576 return 0; 577 } 578 579 /** 580 * append_oa_sample - Copies single OA report into userspace read() buffer. 581 * @stream: An i915-perf stream opened for OA metrics 582 * @buf: destination buffer given by userspace 583 * @count: the number of bytes userspace wants to read 584 * @offset: (inout): the current position for writing into @buf 585 * @report: A single OA report to (optionally) include as part of the sample 586 * 587 * The contents of a sample are configured through `DRM_I915_PERF_PROP_SAMPLE_*` 588 * properties when opening a stream, tracked as `stream->sample_flags`. This 589 * function copies the requested components of a single sample to the given 590 * read() @buf. 591 * 592 * The @buf @offset will only be updated on success. 593 * 594 * Returns: 0 on success, negative error code on failure. 595 */ 596 static int append_oa_sample(struct i915_perf_stream *stream, 597 char __user *buf, 598 size_t count, 599 size_t *offset, 600 const u8 *report) 601 { 602 struct drm_i915_private *dev_priv = stream->dev_priv; 603 int report_size = dev_priv->perf.oa.oa_buffer.format_size; 604 struct drm_i915_perf_record_header header; 605 u32 sample_flags = stream->sample_flags; 606 607 header.type = DRM_I915_PERF_RECORD_SAMPLE; 608 header.pad = 0; 609 header.size = stream->sample_size; 610 611 if ((count - *offset) < header.size) 612 return -ENOSPC; 613 614 buf += *offset; 615 if (copy_to_user(buf, &header, sizeof(header))) 616 return -EFAULT; 617 buf += sizeof(header); 618 619 if (sample_flags & SAMPLE_OA_REPORT) { 620 if (copy_to_user(buf, report, report_size)) 621 return -EFAULT; 622 } 623 624 (*offset) += header.size; 625 626 return 0; 627 } 628 629 /** 630 * Copies all buffered OA reports into userspace read() buffer. 631 * @stream: An i915-perf stream opened for OA metrics 632 * @buf: destination buffer given by userspace 633 * @count: the number of bytes userspace wants to read 634 * @offset: (inout): the current position for writing into @buf 635 * 636 * Notably any error condition resulting in a short read (-%ENOSPC or 637 * -%EFAULT) will be returned even though one or more records may 638 * have been successfully copied. In this case it's up to the caller 639 * to decide if the error should be squashed before returning to 640 * userspace. 641 * 642 * Note: reports are consumed from the head, and appended to the 643 * tail, so the tail chases the head?... If you think that's mad 644 * and back-to-front you're not alone, but this follows the 645 * Gen PRM naming convention. 646 * 647 * Returns: 0 on success, negative error code on failure. 648 */ 649 static int gen8_append_oa_reports(struct i915_perf_stream *stream, 650 char __user *buf, 651 size_t count, 652 size_t *offset) 653 { 654 struct drm_i915_private *dev_priv = stream->dev_priv; 655 int report_size = dev_priv->perf.oa.oa_buffer.format_size; 656 u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; 657 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); 658 u32 mask = (OA_BUFFER_SIZE - 1); 659 size_t start_offset = *offset; 660 unsigned long flags; 661 unsigned int aged_tail_idx; 662 u32 head, tail; 663 u32 taken; 664 int ret = 0; 665 666 if (WARN_ON(!stream->enabled)) 667 return -EIO; 668 669 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 670 671 head = dev_priv->perf.oa.oa_buffer.head; 672 aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx; 673 tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset; 674 675 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 676 677 /* 678 * An invalid tail pointer here means we're still waiting for the poll 679 * hrtimer callback to give us a pointer 680 */ 681 if (tail == INVALID_TAIL_PTR) 682 return -EAGAIN; 683 684 /* 685 * NB: oa_buffer.head/tail include the gtt_offset which we don't want 686 * while indexing relative to oa_buf_base. 687 */ 688 head -= gtt_offset; 689 tail -= gtt_offset; 690 691 /* 692 * An out of bounds or misaligned head or tail pointer implies a driver 693 * bug since we validate + align the tail pointers we read from the 694 * hardware and we are in full control of the head pointer which should 695 * only be incremented by multiples of the report size (notably also 696 * all a power of two). 697 */ 698 if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size || 699 tail > OA_BUFFER_SIZE || tail % report_size, 700 "Inconsistent OA buffer pointers: head = %u, tail = %u\n", 701 head, tail)) 702 return -EIO; 703 704 705 for (/* none */; 706 (taken = OA_TAKEN(tail, head)); 707 head = (head + report_size) & mask) { 708 u8 *report = oa_buf_base + head; 709 u32 *report32 = (void *)report; 710 u32 ctx_id; 711 u32 reason; 712 713 /* 714 * All the report sizes factor neatly into the buffer 715 * size so we never expect to see a report split 716 * between the beginning and end of the buffer. 717 * 718 * Given the initial alignment check a misalignment 719 * here would imply a driver bug that would result 720 * in an overrun. 721 */ 722 if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { 723 DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); 724 break; 725 } 726 727 /* 728 * The reason field includes flags identifying what 729 * triggered this specific report (mostly timer 730 * triggered or e.g. due to a context switch). 731 * 732 * This field is never expected to be zero so we can 733 * check that the report isn't invalid before copying 734 * it to userspace... 735 */ 736 reason = ((report32[0] >> OAREPORT_REASON_SHIFT) & 737 OAREPORT_REASON_MASK); 738 if (reason == 0) { 739 if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs)) 740 DRM_NOTE("Skipping spurious, invalid OA report\n"); 741 continue; 742 } 743 744 ctx_id = report32[2] & dev_priv->perf.oa.specific_ctx_id_mask; 745 746 /* 747 * Squash whatever is in the CTX_ID field if it's marked as 748 * invalid to be sure we avoid false-positive, single-context 749 * filtering below... 750 * 751 * Note: that we don't clear the valid_ctx_bit so userspace can 752 * understand that the ID has been squashed by the kernel. 753 */ 754 if (!(report32[0] & dev_priv->perf.oa.gen8_valid_ctx_bit)) 755 ctx_id = report32[2] = INVALID_CTX_ID; 756 757 /* 758 * NB: For Gen 8 the OA unit no longer supports clock gating 759 * off for a specific context and the kernel can't securely 760 * stop the counters from updating as system-wide / global 761 * values. 762 * 763 * Automatic reports now include a context ID so reports can be 764 * filtered on the cpu but it's not worth trying to 765 * automatically subtract/hide counter progress for other 766 * contexts while filtering since we can't stop userspace 767 * issuing MI_REPORT_PERF_COUNT commands which would still 768 * provide a side-band view of the real values. 769 * 770 * To allow userspace (such as Mesa/GL_INTEL_performance_query) 771 * to normalize counters for a single filtered context then it 772 * needs be forwarded bookend context-switch reports so that it 773 * can track switches in between MI_REPORT_PERF_COUNT commands 774 * and can itself subtract/ignore the progress of counters 775 * associated with other contexts. Note that the hardware 776 * automatically triggers reports when switching to a new 777 * context which are tagged with the ID of the newly active 778 * context. To avoid the complexity (and likely fragility) of 779 * reading ahead while parsing reports to try and minimize 780 * forwarding redundant context switch reports (i.e. between 781 * other, unrelated contexts) we simply elect to forward them 782 * all. 783 * 784 * We don't rely solely on the reason field to identify context 785 * switches since it's not-uncommon for periodic samples to 786 * identify a switch before any 'context switch' report. 787 */ 788 if (!dev_priv->perf.oa.exclusive_stream->ctx || 789 dev_priv->perf.oa.specific_ctx_id == ctx_id || 790 (dev_priv->perf.oa.oa_buffer.last_ctx_id == 791 dev_priv->perf.oa.specific_ctx_id) || 792 reason & OAREPORT_REASON_CTX_SWITCH) { 793 794 /* 795 * While filtering for a single context we avoid 796 * leaking the IDs of other contexts. 797 */ 798 if (dev_priv->perf.oa.exclusive_stream->ctx && 799 dev_priv->perf.oa.specific_ctx_id != ctx_id) { 800 report32[2] = INVALID_CTX_ID; 801 } 802 803 ret = append_oa_sample(stream, buf, count, offset, 804 report); 805 if (ret) 806 break; 807 808 dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id; 809 } 810 811 /* 812 * The above reason field sanity check is based on 813 * the assumption that the OA buffer is initially 814 * zeroed and we reset the field after copying so the 815 * check is still meaningful once old reports start 816 * being overwritten. 817 */ 818 report32[0] = 0; 819 } 820 821 if (start_offset != *offset) { 822 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 823 824 /* 825 * We removed the gtt_offset for the copy loop above, indexing 826 * relative to oa_buf_base so put back here... 827 */ 828 head += gtt_offset; 829 830 I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK); 831 dev_priv->perf.oa.oa_buffer.head = head; 832 833 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 834 } 835 836 return ret; 837 } 838 839 /** 840 * gen8_oa_read - copy status records then buffered OA reports 841 * @stream: An i915-perf stream opened for OA metrics 842 * @buf: destination buffer given by userspace 843 * @count: the number of bytes userspace wants to read 844 * @offset: (inout): the current position for writing into @buf 845 * 846 * Checks OA unit status registers and if necessary appends corresponding 847 * status records for userspace (such as for a buffer full condition) and then 848 * initiate appending any buffered OA reports. 849 * 850 * Updates @offset according to the number of bytes successfully copied into 851 * the userspace buffer. 852 * 853 * NB: some data may be successfully copied to the userspace buffer 854 * even if an error is returned, and this is reflected in the 855 * updated @offset. 856 * 857 * Returns: zero on success or a negative error code 858 */ 859 static int gen8_oa_read(struct i915_perf_stream *stream, 860 char __user *buf, 861 size_t count, 862 size_t *offset) 863 { 864 struct drm_i915_private *dev_priv = stream->dev_priv; 865 u32 oastatus; 866 int ret; 867 868 if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) 869 return -EIO; 870 871 oastatus = I915_READ(GEN8_OASTATUS); 872 873 /* 874 * We treat OABUFFER_OVERFLOW as a significant error: 875 * 876 * Although theoretically we could handle this more gracefully 877 * sometimes, some Gens don't correctly suppress certain 878 * automatically triggered reports in this condition and so we 879 * have to assume that old reports are now being trampled 880 * over. 881 * 882 * Considering how we don't currently give userspace control 883 * over the OA buffer size and always configure a large 16MB 884 * buffer, then a buffer overflow does anyway likely indicate 885 * that something has gone quite badly wrong. 886 */ 887 if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) { 888 ret = append_oa_status(stream, buf, count, offset, 889 DRM_I915_PERF_RECORD_OA_BUFFER_LOST); 890 if (ret) 891 return ret; 892 893 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", 894 dev_priv->perf.oa.period_exponent); 895 896 dev_priv->perf.oa.ops.oa_disable(stream); 897 dev_priv->perf.oa.ops.oa_enable(stream); 898 899 /* 900 * Note: .oa_enable() is expected to re-init the oabuffer and 901 * reset GEN8_OASTATUS for us 902 */ 903 oastatus = I915_READ(GEN8_OASTATUS); 904 } 905 906 if (oastatus & GEN8_OASTATUS_REPORT_LOST) { 907 ret = append_oa_status(stream, buf, count, offset, 908 DRM_I915_PERF_RECORD_OA_REPORT_LOST); 909 if (ret) 910 return ret; 911 I915_WRITE(GEN8_OASTATUS, 912 oastatus & ~GEN8_OASTATUS_REPORT_LOST); 913 } 914 915 return gen8_append_oa_reports(stream, buf, count, offset); 916 } 917 918 /** 919 * Copies all buffered OA reports into userspace read() buffer. 920 * @stream: An i915-perf stream opened for OA metrics 921 * @buf: destination buffer given by userspace 922 * @count: the number of bytes userspace wants to read 923 * @offset: (inout): the current position for writing into @buf 924 * 925 * Notably any error condition resulting in a short read (-%ENOSPC or 926 * -%EFAULT) will be returned even though one or more records may 927 * have been successfully copied. In this case it's up to the caller 928 * to decide if the error should be squashed before returning to 929 * userspace. 930 * 931 * Note: reports are consumed from the head, and appended to the 932 * tail, so the tail chases the head?... If you think that's mad 933 * and back-to-front you're not alone, but this follows the 934 * Gen PRM naming convention. 935 * 936 * Returns: 0 on success, negative error code on failure. 937 */ 938 static int gen7_append_oa_reports(struct i915_perf_stream *stream, 939 char __user *buf, 940 size_t count, 941 size_t *offset) 942 { 943 struct drm_i915_private *dev_priv = stream->dev_priv; 944 int report_size = dev_priv->perf.oa.oa_buffer.format_size; 945 u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; 946 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); 947 u32 mask = (OA_BUFFER_SIZE - 1); 948 size_t start_offset = *offset; 949 unsigned long flags; 950 unsigned int aged_tail_idx; 951 u32 head, tail; 952 u32 taken; 953 int ret = 0; 954 955 if (WARN_ON(!stream->enabled)) 956 return -EIO; 957 958 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 959 960 head = dev_priv->perf.oa.oa_buffer.head; 961 aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx; 962 tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset; 963 964 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 965 966 /* An invalid tail pointer here means we're still waiting for the poll 967 * hrtimer callback to give us a pointer 968 */ 969 if (tail == INVALID_TAIL_PTR) 970 return -EAGAIN; 971 972 /* NB: oa_buffer.head/tail include the gtt_offset which we don't want 973 * while indexing relative to oa_buf_base. 974 */ 975 head -= gtt_offset; 976 tail -= gtt_offset; 977 978 /* An out of bounds or misaligned head or tail pointer implies a driver 979 * bug since we validate + align the tail pointers we read from the 980 * hardware and we are in full control of the head pointer which should 981 * only be incremented by multiples of the report size (notably also 982 * all a power of two). 983 */ 984 if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size || 985 tail > OA_BUFFER_SIZE || tail % report_size, 986 "Inconsistent OA buffer pointers: head = %u, tail = %u\n", 987 head, tail)) 988 return -EIO; 989 990 991 for (/* none */; 992 (taken = OA_TAKEN(tail, head)); 993 head = (head + report_size) & mask) { 994 u8 *report = oa_buf_base + head; 995 u32 *report32 = (void *)report; 996 997 /* All the report sizes factor neatly into the buffer 998 * size so we never expect to see a report split 999 * between the beginning and end of the buffer. 1000 * 1001 * Given the initial alignment check a misalignment 1002 * here would imply a driver bug that would result 1003 * in an overrun. 1004 */ 1005 if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { 1006 DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); 1007 break; 1008 } 1009 1010 /* The report-ID field for periodic samples includes 1011 * some undocumented flags related to what triggered 1012 * the report and is never expected to be zero so we 1013 * can check that the report isn't invalid before 1014 * copying it to userspace... 1015 */ 1016 if (report32[0] == 0) { 1017 if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs)) 1018 DRM_NOTE("Skipping spurious, invalid OA report\n"); 1019 continue; 1020 } 1021 1022 ret = append_oa_sample(stream, buf, count, offset, report); 1023 if (ret) 1024 break; 1025 1026 /* The above report-id field sanity check is based on 1027 * the assumption that the OA buffer is initially 1028 * zeroed and we reset the field after copying so the 1029 * check is still meaningful once old reports start 1030 * being overwritten. 1031 */ 1032 report32[0] = 0; 1033 } 1034 1035 if (start_offset != *offset) { 1036 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 1037 1038 /* We removed the gtt_offset for the copy loop above, indexing 1039 * relative to oa_buf_base so put back here... 1040 */ 1041 head += gtt_offset; 1042 1043 I915_WRITE(GEN7_OASTATUS2, 1044 ((head & GEN7_OASTATUS2_HEAD_MASK) | 1045 GEN7_OASTATUS2_MEM_SELECT_GGTT)); 1046 dev_priv->perf.oa.oa_buffer.head = head; 1047 1048 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 1049 } 1050 1051 return ret; 1052 } 1053 1054 /** 1055 * gen7_oa_read - copy status records then buffered OA reports 1056 * @stream: An i915-perf stream opened for OA metrics 1057 * @buf: destination buffer given by userspace 1058 * @count: the number of bytes userspace wants to read 1059 * @offset: (inout): the current position for writing into @buf 1060 * 1061 * Checks Gen 7 specific OA unit status registers and if necessary appends 1062 * corresponding status records for userspace (such as for a buffer full 1063 * condition) and then initiate appending any buffered OA reports. 1064 * 1065 * Updates @offset according to the number of bytes successfully copied into 1066 * the userspace buffer. 1067 * 1068 * Returns: zero on success or a negative error code 1069 */ 1070 static int gen7_oa_read(struct i915_perf_stream *stream, 1071 char __user *buf, 1072 size_t count, 1073 size_t *offset) 1074 { 1075 struct drm_i915_private *dev_priv = stream->dev_priv; 1076 u32 oastatus1; 1077 int ret; 1078 1079 if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) 1080 return -EIO; 1081 1082 oastatus1 = I915_READ(GEN7_OASTATUS1); 1083 1084 /* XXX: On Haswell we don't have a safe way to clear oastatus1 1085 * bits while the OA unit is enabled (while the tail pointer 1086 * may be updated asynchronously) so we ignore status bits 1087 * that have already been reported to userspace. 1088 */ 1089 oastatus1 &= ~dev_priv->perf.oa.gen7_latched_oastatus1; 1090 1091 /* We treat OABUFFER_OVERFLOW as a significant error: 1092 * 1093 * - The status can be interpreted to mean that the buffer is 1094 * currently full (with a higher precedence than OA_TAKEN() 1095 * which will start to report a near-empty buffer after an 1096 * overflow) but it's awkward that we can't clear the status 1097 * on Haswell, so without a reset we won't be able to catch 1098 * the state again. 1099 * 1100 * - Since it also implies the HW has started overwriting old 1101 * reports it may also affect our sanity checks for invalid 1102 * reports when copying to userspace that assume new reports 1103 * are being written to cleared memory. 1104 * 1105 * - In the future we may want to introduce a flight recorder 1106 * mode where the driver will automatically maintain a safe 1107 * guard band between head/tail, avoiding this overflow 1108 * condition, but we avoid the added driver complexity for 1109 * now. 1110 */ 1111 if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) { 1112 ret = append_oa_status(stream, buf, count, offset, 1113 DRM_I915_PERF_RECORD_OA_BUFFER_LOST); 1114 if (ret) 1115 return ret; 1116 1117 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", 1118 dev_priv->perf.oa.period_exponent); 1119 1120 dev_priv->perf.oa.ops.oa_disable(stream); 1121 dev_priv->perf.oa.ops.oa_enable(stream); 1122 1123 oastatus1 = I915_READ(GEN7_OASTATUS1); 1124 } 1125 1126 if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) { 1127 ret = append_oa_status(stream, buf, count, offset, 1128 DRM_I915_PERF_RECORD_OA_REPORT_LOST); 1129 if (ret) 1130 return ret; 1131 dev_priv->perf.oa.gen7_latched_oastatus1 |= 1132 GEN7_OASTATUS1_REPORT_LOST; 1133 } 1134 1135 return gen7_append_oa_reports(stream, buf, count, offset); 1136 } 1137 1138 /** 1139 * i915_oa_wait_unlocked - handles blocking IO until OA data available 1140 * @stream: An i915-perf stream opened for OA metrics 1141 * 1142 * Called when userspace tries to read() from a blocking stream FD opened 1143 * for OA metrics. It waits until the hrtimer callback finds a non-empty 1144 * OA buffer and wakes us. 1145 * 1146 * Note: it's acceptable to have this return with some false positives 1147 * since any subsequent read handling will return -EAGAIN if there isn't 1148 * really data ready for userspace yet. 1149 * 1150 * Returns: zero on success or a negative error code 1151 */ 1152 static int i915_oa_wait_unlocked(struct i915_perf_stream *stream) 1153 { 1154 struct drm_i915_private *dev_priv = stream->dev_priv; 1155 1156 /* We would wait indefinitely if periodic sampling is not enabled */ 1157 if (!dev_priv->perf.oa.periodic) 1158 return -EIO; 1159 1160 return wait_event_interruptible(dev_priv->perf.oa.poll_wq, 1161 oa_buffer_check_unlocked(dev_priv)); 1162 } 1163 1164 /** 1165 * i915_oa_poll_wait - call poll_wait() for an OA stream poll() 1166 * @stream: An i915-perf stream opened for OA metrics 1167 * @file: An i915 perf stream file 1168 * @wait: poll() state table 1169 * 1170 * For handling userspace polling on an i915 perf stream opened for OA metrics, 1171 * this starts a poll_wait with the wait queue that our hrtimer callback wakes 1172 * when it sees data ready to read in the circular OA buffer. 1173 */ 1174 static void i915_oa_poll_wait(struct i915_perf_stream *stream, 1175 struct file *file, 1176 poll_table *wait) 1177 { 1178 struct drm_i915_private *dev_priv = stream->dev_priv; 1179 1180 poll_wait(file, &dev_priv->perf.oa.poll_wq, wait); 1181 } 1182 1183 /** 1184 * i915_oa_read - just calls through to &i915_oa_ops->read 1185 * @stream: An i915-perf stream opened for OA metrics 1186 * @buf: destination buffer given by userspace 1187 * @count: the number of bytes userspace wants to read 1188 * @offset: (inout): the current position for writing into @buf 1189 * 1190 * Updates @offset according to the number of bytes successfully copied into 1191 * the userspace buffer. 1192 * 1193 * Returns: zero on success or a negative error code 1194 */ 1195 static int i915_oa_read(struct i915_perf_stream *stream, 1196 char __user *buf, 1197 size_t count, 1198 size_t *offset) 1199 { 1200 struct drm_i915_private *dev_priv = stream->dev_priv; 1201 1202 return dev_priv->perf.oa.ops.read(stream, buf, count, offset); 1203 } 1204 1205 static struct intel_context *oa_pin_context(struct drm_i915_private *i915, 1206 struct i915_gem_context *ctx) 1207 { 1208 struct i915_gem_engines_iter it; 1209 struct intel_context *ce; 1210 int err; 1211 1212 err = i915_mutex_lock_interruptible(&i915->drm); 1213 if (err) 1214 return ERR_PTR(err); 1215 1216 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 1217 if (ce->engine->class != RENDER_CLASS) 1218 continue; 1219 1220 /* 1221 * As the ID is the gtt offset of the context's vma we 1222 * pin the vma to ensure the ID remains fixed. 1223 */ 1224 err = intel_context_pin(ce); 1225 if (err == 0) { 1226 i915->perf.oa.pinned_ctx = ce; 1227 break; 1228 } 1229 } 1230 i915_gem_context_unlock_engines(ctx); 1231 1232 mutex_unlock(&i915->drm.struct_mutex); 1233 if (err) 1234 return ERR_PTR(err); 1235 1236 return i915->perf.oa.pinned_ctx; 1237 } 1238 1239 /** 1240 * oa_get_render_ctx_id - determine and hold ctx hw id 1241 * @stream: An i915-perf stream opened for OA metrics 1242 * 1243 * Determine the render context hw id, and ensure it remains fixed for the 1244 * lifetime of the stream. This ensures that we don't have to worry about 1245 * updating the context ID in OACONTROL on the fly. 1246 * 1247 * Returns: zero on success or a negative error code 1248 */ 1249 static int oa_get_render_ctx_id(struct i915_perf_stream *stream) 1250 { 1251 struct drm_i915_private *i915 = stream->dev_priv; 1252 struct intel_context *ce; 1253 1254 ce = oa_pin_context(i915, stream->ctx); 1255 if (IS_ERR(ce)) 1256 return PTR_ERR(ce); 1257 1258 switch (INTEL_GEN(i915)) { 1259 case 7: { 1260 /* 1261 * On Haswell we don't do any post processing of the reports 1262 * and don't need to use the mask. 1263 */ 1264 i915->perf.oa.specific_ctx_id = i915_ggtt_offset(ce->state); 1265 i915->perf.oa.specific_ctx_id_mask = 0; 1266 break; 1267 } 1268 1269 case 8: 1270 case 9: 1271 case 10: 1272 if (USES_GUC_SUBMISSION(i915)) { 1273 /* 1274 * When using GuC, the context descriptor we write in 1275 * i915 is read by GuC and rewritten before it's 1276 * actually written into the hardware. The LRCA is 1277 * what is put into the context id field of the 1278 * context descriptor by GuC. Because it's aligned to 1279 * a page, the lower 12bits are always at 0 and 1280 * dropped by GuC. They won't be part of the context 1281 * ID in the OA reports, so squash those lower bits. 1282 */ 1283 i915->perf.oa.specific_ctx_id = 1284 lower_32_bits(ce->lrc_desc) >> 12; 1285 1286 /* 1287 * GuC uses the top bit to signal proxy submission, so 1288 * ignore that bit. 1289 */ 1290 i915->perf.oa.specific_ctx_id_mask = 1291 (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1; 1292 } else { 1293 i915->perf.oa.specific_ctx_id_mask = 1294 (1U << GEN8_CTX_ID_WIDTH) - 1; 1295 i915->perf.oa.specific_ctx_id = 1296 upper_32_bits(ce->lrc_desc); 1297 i915->perf.oa.specific_ctx_id &= 1298 i915->perf.oa.specific_ctx_id_mask; 1299 } 1300 break; 1301 1302 case 11: { 1303 i915->perf.oa.specific_ctx_id_mask = 1304 ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) | 1305 ((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) | 1306 ((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32); 1307 i915->perf.oa.specific_ctx_id = upper_32_bits(ce->lrc_desc); 1308 i915->perf.oa.specific_ctx_id &= 1309 i915->perf.oa.specific_ctx_id_mask; 1310 break; 1311 } 1312 1313 default: 1314 MISSING_CASE(INTEL_GEN(i915)); 1315 } 1316 1317 DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n", 1318 i915->perf.oa.specific_ctx_id, 1319 i915->perf.oa.specific_ctx_id_mask); 1320 1321 return 0; 1322 } 1323 1324 /** 1325 * oa_put_render_ctx_id - counterpart to oa_get_render_ctx_id releases hold 1326 * @stream: An i915-perf stream opened for OA metrics 1327 * 1328 * In case anything needed doing to ensure the context HW ID would remain valid 1329 * for the lifetime of the stream, then that can be undone here. 1330 */ 1331 static void oa_put_render_ctx_id(struct i915_perf_stream *stream) 1332 { 1333 struct drm_i915_private *dev_priv = stream->dev_priv; 1334 struct intel_context *ce; 1335 1336 dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; 1337 dev_priv->perf.oa.specific_ctx_id_mask = 0; 1338 1339 ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx); 1340 if (ce) { 1341 mutex_lock(&dev_priv->drm.struct_mutex); 1342 intel_context_unpin(ce); 1343 mutex_unlock(&dev_priv->drm.struct_mutex); 1344 } 1345 } 1346 1347 static void 1348 free_oa_buffer(struct drm_i915_private *i915) 1349 { 1350 mutex_lock(&i915->drm.struct_mutex); 1351 1352 i915_vma_unpin_and_release(&i915->perf.oa.oa_buffer.vma, 1353 I915_VMA_RELEASE_MAP); 1354 1355 mutex_unlock(&i915->drm.struct_mutex); 1356 1357 i915->perf.oa.oa_buffer.vaddr = NULL; 1358 } 1359 1360 static void i915_oa_stream_destroy(struct i915_perf_stream *stream) 1361 { 1362 struct drm_i915_private *dev_priv = stream->dev_priv; 1363 1364 BUG_ON(stream != dev_priv->perf.oa.exclusive_stream); 1365 1366 /* 1367 * Unset exclusive_stream first, it will be checked while disabling 1368 * the metric set on gen8+. 1369 */ 1370 mutex_lock(&dev_priv->drm.struct_mutex); 1371 dev_priv->perf.oa.exclusive_stream = NULL; 1372 dev_priv->perf.oa.ops.disable_metric_set(dev_priv); 1373 mutex_unlock(&dev_priv->drm.struct_mutex); 1374 1375 free_oa_buffer(dev_priv); 1376 1377 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 1378 intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); 1379 1380 if (stream->ctx) 1381 oa_put_render_ctx_id(stream); 1382 1383 put_oa_config(dev_priv, stream->oa_config); 1384 1385 if (dev_priv->perf.oa.spurious_report_rs.missed) { 1386 DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", 1387 dev_priv->perf.oa.spurious_report_rs.missed); 1388 } 1389 } 1390 1391 static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) 1392 { 1393 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); 1394 unsigned long flags; 1395 1396 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 1397 1398 /* Pre-DevBDW: OABUFFER must be set with counters off, 1399 * before OASTATUS1, but after OASTATUS2 1400 */ 1401 I915_WRITE(GEN7_OASTATUS2, 1402 gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); /* head */ 1403 dev_priv->perf.oa.oa_buffer.head = gtt_offset; 1404 1405 I915_WRITE(GEN7_OABUFFER, gtt_offset); 1406 1407 I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ 1408 1409 /* Mark that we need updated tail pointers to read from... */ 1410 dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR; 1411 dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR; 1412 1413 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 1414 1415 /* On Haswell we have to track which OASTATUS1 flags we've 1416 * already seen since they can't be cleared while periodic 1417 * sampling is enabled. 1418 */ 1419 dev_priv->perf.oa.gen7_latched_oastatus1 = 0; 1420 1421 /* NB: although the OA buffer will initially be allocated 1422 * zeroed via shmfs (and so this memset is redundant when 1423 * first allocating), we may re-init the OA buffer, either 1424 * when re-enabling a stream or in error/reset paths. 1425 * 1426 * The reason we clear the buffer for each re-init is for the 1427 * sanity check in gen7_append_oa_reports() that looks at the 1428 * report-id field to make sure it's non-zero which relies on 1429 * the assumption that new reports are being written to zeroed 1430 * memory... 1431 */ 1432 memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); 1433 1434 /* Maybe make ->pollin per-stream state if we support multiple 1435 * concurrent streams in the future. 1436 */ 1437 dev_priv->perf.oa.pollin = false; 1438 } 1439 1440 static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) 1441 { 1442 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); 1443 unsigned long flags; 1444 1445 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 1446 1447 I915_WRITE(GEN8_OASTATUS, 0); 1448 I915_WRITE(GEN8_OAHEADPTR, gtt_offset); 1449 dev_priv->perf.oa.oa_buffer.head = gtt_offset; 1450 1451 I915_WRITE(GEN8_OABUFFER_UDW, 0); 1452 1453 /* 1454 * PRM says: 1455 * 1456 * "This MMIO must be set before the OATAILPTR 1457 * register and after the OAHEADPTR register. This is 1458 * to enable proper functionality of the overflow 1459 * bit." 1460 */ 1461 I915_WRITE(GEN8_OABUFFER, gtt_offset | 1462 OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); 1463 I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); 1464 1465 /* Mark that we need updated tail pointers to read from... */ 1466 dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR; 1467 dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR; 1468 1469 /* 1470 * Reset state used to recognise context switches, affecting which 1471 * reports we will forward to userspace while filtering for a single 1472 * context. 1473 */ 1474 dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID; 1475 1476 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 1477 1478 /* 1479 * NB: although the OA buffer will initially be allocated 1480 * zeroed via shmfs (and so this memset is redundant when 1481 * first allocating), we may re-init the OA buffer, either 1482 * when re-enabling a stream or in error/reset paths. 1483 * 1484 * The reason we clear the buffer for each re-init is for the 1485 * sanity check in gen8_append_oa_reports() that looks at the 1486 * reason field to make sure it's non-zero which relies on 1487 * the assumption that new reports are being written to zeroed 1488 * memory... 1489 */ 1490 memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); 1491 1492 /* 1493 * Maybe make ->pollin per-stream state if we support multiple 1494 * concurrent streams in the future. 1495 */ 1496 dev_priv->perf.oa.pollin = false; 1497 } 1498 1499 static int alloc_oa_buffer(struct drm_i915_private *dev_priv) 1500 { 1501 struct drm_i915_gem_object *bo; 1502 struct i915_vma *vma; 1503 int ret; 1504 1505 if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma)) 1506 return -ENODEV; 1507 1508 ret = i915_mutex_lock_interruptible(&dev_priv->drm); 1509 if (ret) 1510 return ret; 1511 1512 BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); 1513 BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); 1514 1515 bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE); 1516 if (IS_ERR(bo)) { 1517 DRM_ERROR("Failed to allocate OA buffer\n"); 1518 ret = PTR_ERR(bo); 1519 goto unlock; 1520 } 1521 1522 i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC); 1523 1524 /* PreHSW required 512K alignment, HSW requires 16M */ 1525 vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0); 1526 if (IS_ERR(vma)) { 1527 ret = PTR_ERR(vma); 1528 goto err_unref; 1529 } 1530 dev_priv->perf.oa.oa_buffer.vma = vma; 1531 1532 dev_priv->perf.oa.oa_buffer.vaddr = 1533 i915_gem_object_pin_map(bo, I915_MAP_WB); 1534 if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) { 1535 ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr); 1536 goto err_unpin; 1537 } 1538 1539 DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", 1540 i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma), 1541 dev_priv->perf.oa.oa_buffer.vaddr); 1542 1543 goto unlock; 1544 1545 err_unpin: 1546 __i915_vma_unpin(vma); 1547 1548 err_unref: 1549 i915_gem_object_put(bo); 1550 1551 dev_priv->perf.oa.oa_buffer.vaddr = NULL; 1552 dev_priv->perf.oa.oa_buffer.vma = NULL; 1553 1554 unlock: 1555 mutex_unlock(&dev_priv->drm.struct_mutex); 1556 return ret; 1557 } 1558 1559 static void config_oa_regs(struct drm_i915_private *dev_priv, 1560 const struct i915_oa_reg *regs, 1561 u32 n_regs) 1562 { 1563 u32 i; 1564 1565 for (i = 0; i < n_regs; i++) { 1566 const struct i915_oa_reg *reg = regs + i; 1567 1568 I915_WRITE(reg->addr, reg->value); 1569 } 1570 } 1571 1572 static int hsw_enable_metric_set(struct i915_perf_stream *stream) 1573 { 1574 struct drm_i915_private *dev_priv = stream->dev_priv; 1575 const struct i915_oa_config *oa_config = stream->oa_config; 1576 1577 /* PRM: 1578 * 1579 * OA unit is using “crclk” for its functionality. When trunk 1580 * level clock gating takes place, OA clock would be gated, 1581 * unable to count the events from non-render clock domain. 1582 * Render clock gating must be disabled when OA is enabled to 1583 * count the events from non-render domain. Unit level clock 1584 * gating for RCS should also be disabled. 1585 */ 1586 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & 1587 ~GEN7_DOP_CLOCK_GATE_ENABLE)); 1588 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | 1589 GEN6_CSUNIT_CLOCK_GATE_DISABLE)); 1590 1591 config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); 1592 1593 /* It apparently takes a fairly long time for a new MUX 1594 * configuration to be be applied after these register writes. 1595 * This delay duration was derived empirically based on the 1596 * render_basic config but hopefully it covers the maximum 1597 * configuration latency. 1598 * 1599 * As a fallback, the checks in _append_oa_reports() to skip 1600 * invalid OA reports do also seem to work to discard reports 1601 * generated before this config has completed - albeit not 1602 * silently. 1603 * 1604 * Unfortunately this is essentially a magic number, since we 1605 * don't currently know of a reliable mechanism for predicting 1606 * how long the MUX config will take to apply and besides 1607 * seeing invalid reports we don't know of a reliable way to 1608 * explicitly check that the MUX config has landed. 1609 * 1610 * It's even possible we've miss characterized the underlying 1611 * problem - it just seems like the simplest explanation why 1612 * a delay at this location would mitigate any invalid reports. 1613 */ 1614 usleep_range(15000, 20000); 1615 1616 config_oa_regs(dev_priv, oa_config->b_counter_regs, 1617 oa_config->b_counter_regs_len); 1618 1619 return 0; 1620 } 1621 1622 static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) 1623 { 1624 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) & 1625 ~GEN6_CSUNIT_CLOCK_GATE_DISABLE)); 1626 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) | 1627 GEN7_DOP_CLOCK_GATE_ENABLE)); 1628 1629 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & 1630 ~GT_NOA_ENABLE)); 1631 } 1632 1633 /* 1634 * NB: It must always remain pointer safe to run this even if the OA unit 1635 * has been disabled. 1636 * 1637 * It's fine to put out-of-date values into these per-context registers 1638 * in the case that the OA unit has been disabled. 1639 */ 1640 static void 1641 gen8_update_reg_state_unlocked(struct intel_context *ce, 1642 u32 *reg_state, 1643 const struct i915_oa_config *oa_config) 1644 { 1645 struct drm_i915_private *i915 = ce->gem_context->i915; 1646 u32 ctx_oactxctrl = i915->perf.oa.ctx_oactxctrl_offset; 1647 u32 ctx_flexeu0 = i915->perf.oa.ctx_flexeu0_offset; 1648 /* The MMIO offsets for Flex EU registers aren't contiguous */ 1649 i915_reg_t flex_regs[] = { 1650 EU_PERF_CNTL0, 1651 EU_PERF_CNTL1, 1652 EU_PERF_CNTL2, 1653 EU_PERF_CNTL3, 1654 EU_PERF_CNTL4, 1655 EU_PERF_CNTL5, 1656 EU_PERF_CNTL6, 1657 }; 1658 int i; 1659 1660 CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, 1661 (i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | 1662 (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | 1663 GEN8_OA_COUNTER_RESUME); 1664 1665 for (i = 0; i < ARRAY_SIZE(flex_regs); i++) { 1666 u32 state_offset = ctx_flexeu0 + i * 2; 1667 u32 mmio = i915_mmio_reg_offset(flex_regs[i]); 1668 1669 /* 1670 * This arbitrary default will select the 'EU FPU0 Pipeline 1671 * Active' event. In the future it's anticipated that there 1672 * will be an explicit 'No Event' we can select, but not yet... 1673 */ 1674 u32 value = 0; 1675 1676 if (oa_config) { 1677 u32 j; 1678 1679 for (j = 0; j < oa_config->flex_regs_len; j++) { 1680 if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) { 1681 value = oa_config->flex_regs[j].value; 1682 break; 1683 } 1684 } 1685 } 1686 1687 CTX_REG(reg_state, state_offset, flex_regs[i], value); 1688 } 1689 1690 CTX_REG(reg_state, 1691 CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 1692 intel_sseu_make_rpcs(i915, &ce->sseu)); 1693 } 1694 1695 /* 1696 * Manages updating the per-context aspects of the OA stream 1697 * configuration across all contexts. 1698 * 1699 * The awkward consideration here is that OACTXCONTROL controls the 1700 * exponent for periodic sampling which is primarily used for system 1701 * wide profiling where we'd like a consistent sampling period even in 1702 * the face of context switches. 1703 * 1704 * Our approach of updating the register state context (as opposed to 1705 * say using a workaround batch buffer) ensures that the hardware 1706 * won't automatically reload an out-of-date timer exponent even 1707 * transiently before a WA BB could be parsed. 1708 * 1709 * This function needs to: 1710 * - Ensure the currently running context's per-context OA state is 1711 * updated 1712 * - Ensure that all existing contexts will have the correct per-context 1713 * OA state if they are scheduled for use. 1714 * - Ensure any new contexts will be initialized with the correct 1715 * per-context OA state. 1716 * 1717 * Note: it's only the RCS/Render context that has any OA state. 1718 */ 1719 static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, 1720 const struct i915_oa_config *oa_config) 1721 { 1722 unsigned int map_type = i915_coherent_map_type(dev_priv); 1723 struct i915_gem_context *ctx; 1724 struct i915_request *rq; 1725 int ret; 1726 1727 lockdep_assert_held(&dev_priv->drm.struct_mutex); 1728 1729 /* 1730 * The OA register config is setup through the context image. This image 1731 * might be written to by the GPU on context switch (in particular on 1732 * lite-restore). This means we can't safely update a context's image, 1733 * if this context is scheduled/submitted to run on the GPU. 1734 * 1735 * We could emit the OA register config through the batch buffer but 1736 * this might leave small interval of time where the OA unit is 1737 * configured at an invalid sampling period. 1738 * 1739 * So far the best way to work around this issue seems to be draining 1740 * the GPU from any submitted work. 1741 */ 1742 ret = i915_gem_wait_for_idle(dev_priv, 1743 I915_WAIT_LOCKED, 1744 MAX_SCHEDULE_TIMEOUT); 1745 if (ret) 1746 return ret; 1747 1748 /* Update all contexts now that we've stalled the submission. */ 1749 list_for_each_entry(ctx, &dev_priv->contexts.list, link) { 1750 struct i915_gem_engines_iter it; 1751 struct intel_context *ce; 1752 1753 for_each_gem_engine(ce, 1754 i915_gem_context_lock_engines(ctx), 1755 it) { 1756 u32 *regs; 1757 1758 if (ce->engine->class != RENDER_CLASS) 1759 continue; 1760 1761 /* OA settings will be set upon first use */ 1762 if (!ce->state) 1763 continue; 1764 1765 regs = i915_gem_object_pin_map(ce->state->obj, 1766 map_type); 1767 if (IS_ERR(regs)) { 1768 i915_gem_context_unlock_engines(ctx); 1769 return PTR_ERR(regs); 1770 } 1771 1772 ce->state->obj->mm.dirty = true; 1773 regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); 1774 1775 gen8_update_reg_state_unlocked(ce, regs, oa_config); 1776 1777 i915_gem_object_unpin_map(ce->state->obj); 1778 } 1779 i915_gem_context_unlock_engines(ctx); 1780 } 1781 1782 /* 1783 * Apply the configuration by doing one context restore of the edited 1784 * context image. 1785 */ 1786 rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context); 1787 if (IS_ERR(rq)) 1788 return PTR_ERR(rq); 1789 1790 i915_request_add(rq); 1791 1792 return 0; 1793 } 1794 1795 static int gen8_enable_metric_set(struct i915_perf_stream *stream) 1796 { 1797 struct drm_i915_private *dev_priv = stream->dev_priv; 1798 const struct i915_oa_config *oa_config = stream->oa_config; 1799 int ret; 1800 1801 /* 1802 * We disable slice/unslice clock ratio change reports on SKL since 1803 * they are too noisy. The HW generates a lot of redundant reports 1804 * where the ratio hasn't really changed causing a lot of redundant 1805 * work to processes and increasing the chances we'll hit buffer 1806 * overruns. 1807 * 1808 * Although we don't currently use the 'disable overrun' OABUFFER 1809 * feature it's worth noting that clock ratio reports have to be 1810 * disabled before considering to use that feature since the HW doesn't 1811 * correctly block these reports. 1812 * 1813 * Currently none of the high-level metrics we have depend on knowing 1814 * this ratio to normalize. 1815 * 1816 * Note: This register is not power context saved and restored, but 1817 * that's OK considering that we disable RC6 while the OA unit is 1818 * enabled. 1819 * 1820 * The _INCLUDE_CLK_RATIO bit allows the slice/unslice frequency to 1821 * be read back from automatically triggered reports, as part of the 1822 * RPT_ID field. 1823 */ 1824 if (IS_GEN_RANGE(dev_priv, 9, 11)) { 1825 I915_WRITE(GEN8_OA_DEBUG, 1826 _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | 1827 GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); 1828 } 1829 1830 /* 1831 * Update all contexts prior writing the mux configurations as we need 1832 * to make sure all slices/subslices are ON before writing to NOA 1833 * registers. 1834 */ 1835 ret = gen8_configure_all_contexts(dev_priv, oa_config); 1836 if (ret) 1837 return ret; 1838 1839 config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); 1840 1841 config_oa_regs(dev_priv, oa_config->b_counter_regs, 1842 oa_config->b_counter_regs_len); 1843 1844 return 0; 1845 } 1846 1847 static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) 1848 { 1849 /* Reset all contexts' slices/subslices configurations. */ 1850 gen8_configure_all_contexts(dev_priv, NULL); 1851 1852 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & 1853 ~GT_NOA_ENABLE)); 1854 } 1855 1856 static void gen10_disable_metric_set(struct drm_i915_private *dev_priv) 1857 { 1858 /* Reset all contexts' slices/subslices configurations. */ 1859 gen8_configure_all_contexts(dev_priv, NULL); 1860 1861 /* Make sure we disable noa to save power. */ 1862 I915_WRITE(RPM_CONFIG1, 1863 I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE); 1864 } 1865 1866 static void gen7_oa_enable(struct i915_perf_stream *stream) 1867 { 1868 struct drm_i915_private *dev_priv = stream->dev_priv; 1869 struct i915_gem_context *ctx = stream->ctx; 1870 u32 ctx_id = dev_priv->perf.oa.specific_ctx_id; 1871 bool periodic = dev_priv->perf.oa.periodic; 1872 u32 period_exponent = dev_priv->perf.oa.period_exponent; 1873 u32 report_format = dev_priv->perf.oa.oa_buffer.format; 1874 1875 /* 1876 * Reset buf pointers so we don't forward reports from before now. 1877 * 1878 * Think carefully if considering trying to avoid this, since it 1879 * also ensures status flags and the buffer itself are cleared 1880 * in error paths, and we have checks for invalid reports based 1881 * on the assumption that certain fields are written to zeroed 1882 * memory which this helps maintains. 1883 */ 1884 gen7_init_oa_buffer(dev_priv); 1885 1886 I915_WRITE(GEN7_OACONTROL, 1887 (ctx_id & GEN7_OACONTROL_CTX_MASK) | 1888 (period_exponent << 1889 GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | 1890 (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | 1891 (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | 1892 (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | 1893 GEN7_OACONTROL_ENABLE); 1894 } 1895 1896 static void gen8_oa_enable(struct i915_perf_stream *stream) 1897 { 1898 struct drm_i915_private *dev_priv = stream->dev_priv; 1899 u32 report_format = dev_priv->perf.oa.oa_buffer.format; 1900 1901 /* 1902 * Reset buf pointers so we don't forward reports from before now. 1903 * 1904 * Think carefully if considering trying to avoid this, since it 1905 * also ensures status flags and the buffer itself are cleared 1906 * in error paths, and we have checks for invalid reports based 1907 * on the assumption that certain fields are written to zeroed 1908 * memory which this helps maintains. 1909 */ 1910 gen8_init_oa_buffer(dev_priv); 1911 1912 /* 1913 * Note: we don't rely on the hardware to perform single context 1914 * filtering and instead filter on the cpu based on the context-id 1915 * field of reports 1916 */ 1917 I915_WRITE(GEN8_OACONTROL, (report_format << 1918 GEN8_OA_REPORT_FORMAT_SHIFT) | 1919 GEN8_OA_COUNTER_ENABLE); 1920 } 1921 1922 /** 1923 * i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream 1924 * @stream: An i915 perf stream opened for OA metrics 1925 * 1926 * [Re]enables hardware periodic sampling according to the period configured 1927 * when opening the stream. This also starts a hrtimer that will periodically 1928 * check for data in the circular OA buffer for notifying userspace (e.g. 1929 * during a read() or poll()). 1930 */ 1931 static void i915_oa_stream_enable(struct i915_perf_stream *stream) 1932 { 1933 struct drm_i915_private *dev_priv = stream->dev_priv; 1934 1935 dev_priv->perf.oa.ops.oa_enable(stream); 1936 1937 if (dev_priv->perf.oa.periodic) 1938 hrtimer_start(&dev_priv->perf.oa.poll_check_timer, 1939 ns_to_ktime(POLL_PERIOD), 1940 HRTIMER_MODE_REL_PINNED); 1941 } 1942 1943 static void gen7_oa_disable(struct i915_perf_stream *stream) 1944 { 1945 struct intel_uncore *uncore = &stream->dev_priv->uncore; 1946 1947 intel_uncore_write(uncore, GEN7_OACONTROL, 0); 1948 if (intel_wait_for_register(uncore, 1949 GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0, 1950 50)) 1951 DRM_ERROR("wait for OA to be disabled timed out\n"); 1952 } 1953 1954 static void gen8_oa_disable(struct i915_perf_stream *stream) 1955 { 1956 struct intel_uncore *uncore = &stream->dev_priv->uncore; 1957 1958 intel_uncore_write(uncore, GEN8_OACONTROL, 0); 1959 if (intel_wait_for_register(uncore, 1960 GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0, 1961 50)) 1962 DRM_ERROR("wait for OA to be disabled timed out\n"); 1963 } 1964 1965 /** 1966 * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream 1967 * @stream: An i915 perf stream opened for OA metrics 1968 * 1969 * Stops the OA unit from periodically writing counter reports into the 1970 * circular OA buffer. This also stops the hrtimer that periodically checks for 1971 * data in the circular OA buffer, for notifying userspace. 1972 */ 1973 static void i915_oa_stream_disable(struct i915_perf_stream *stream) 1974 { 1975 struct drm_i915_private *dev_priv = stream->dev_priv; 1976 1977 dev_priv->perf.oa.ops.oa_disable(stream); 1978 1979 if (dev_priv->perf.oa.periodic) 1980 hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer); 1981 } 1982 1983 static const struct i915_perf_stream_ops i915_oa_stream_ops = { 1984 .destroy = i915_oa_stream_destroy, 1985 .enable = i915_oa_stream_enable, 1986 .disable = i915_oa_stream_disable, 1987 .wait_unlocked = i915_oa_wait_unlocked, 1988 .poll_wait = i915_oa_poll_wait, 1989 .read = i915_oa_read, 1990 }; 1991 1992 /** 1993 * i915_oa_stream_init - validate combined props for OA stream and init 1994 * @stream: An i915 perf stream 1995 * @param: The open parameters passed to `DRM_I915_PERF_OPEN` 1996 * @props: The property state that configures stream (individually validated) 1997 * 1998 * While read_properties_unlocked() validates properties in isolation it 1999 * doesn't ensure that the combination necessarily makes sense. 2000 * 2001 * At this point it has been determined that userspace wants a stream of 2002 * OA metrics, but still we need to further validate the combined 2003 * properties are OK. 2004 * 2005 * If the configuration makes sense then we can allocate memory for 2006 * a circular OA buffer and apply the requested metric set configuration. 2007 * 2008 * Returns: zero on success or a negative error code. 2009 */ 2010 static int i915_oa_stream_init(struct i915_perf_stream *stream, 2011 struct drm_i915_perf_open_param *param, 2012 struct perf_open_properties *props) 2013 { 2014 struct drm_i915_private *dev_priv = stream->dev_priv; 2015 int format_size; 2016 int ret; 2017 2018 /* If the sysfs metrics/ directory wasn't registered for some 2019 * reason then don't let userspace try their luck with config 2020 * IDs 2021 */ 2022 if (!dev_priv->perf.metrics_kobj) { 2023 DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); 2024 return -EINVAL; 2025 } 2026 2027 if (!(props->sample_flags & SAMPLE_OA_REPORT)) { 2028 DRM_DEBUG("Only OA report sampling supported\n"); 2029 return -EINVAL; 2030 } 2031 2032 if (!dev_priv->perf.oa.ops.enable_metric_set) { 2033 DRM_DEBUG("OA unit not supported\n"); 2034 return -ENODEV; 2035 } 2036 2037 /* To avoid the complexity of having to accurately filter 2038 * counter reports and marshal to the appropriate client 2039 * we currently only allow exclusive access 2040 */ 2041 if (dev_priv->perf.oa.exclusive_stream) { 2042 DRM_DEBUG("OA unit already in use\n"); 2043 return -EBUSY; 2044 } 2045 2046 if (!props->oa_format) { 2047 DRM_DEBUG("OA report format not specified\n"); 2048 return -EINVAL; 2049 } 2050 2051 /* We set up some ratelimit state to potentially throttle any _NOTES 2052 * about spurious, invalid OA reports which we don't forward to 2053 * userspace. 2054 * 2055 * The initialization is associated with opening the stream (not driver 2056 * init) considering we print a _NOTE about any throttling when closing 2057 * the stream instead of waiting until driver _fini which no one would 2058 * ever see. 2059 * 2060 * Using the same limiting factors as printk_ratelimit() 2061 */ 2062 ratelimit_state_init(&dev_priv->perf.oa.spurious_report_rs, 2063 5 * HZ, 10); 2064 /* Since we use a DRM_NOTE for spurious reports it would be 2065 * inconsistent to let __ratelimit() automatically print a warning for 2066 * throttling. 2067 */ 2068 ratelimit_set_flags(&dev_priv->perf.oa.spurious_report_rs, 2069 RATELIMIT_MSG_ON_RELEASE); 2070 2071 stream->sample_size = sizeof(struct drm_i915_perf_record_header); 2072 2073 format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size; 2074 2075 stream->sample_flags |= SAMPLE_OA_REPORT; 2076 stream->sample_size += format_size; 2077 2078 dev_priv->perf.oa.oa_buffer.format_size = format_size; 2079 if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0)) 2080 return -EINVAL; 2081 2082 dev_priv->perf.oa.oa_buffer.format = 2083 dev_priv->perf.oa.oa_formats[props->oa_format].format; 2084 2085 dev_priv->perf.oa.periodic = props->oa_periodic; 2086 if (dev_priv->perf.oa.periodic) 2087 dev_priv->perf.oa.period_exponent = props->oa_period_exponent; 2088 2089 if (stream->ctx) { 2090 ret = oa_get_render_ctx_id(stream); 2091 if (ret) { 2092 DRM_DEBUG("Invalid context id to filter with\n"); 2093 return ret; 2094 } 2095 } 2096 2097 ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config); 2098 if (ret) { 2099 DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); 2100 goto err_config; 2101 } 2102 2103 /* PRM - observability performance counters: 2104 * 2105 * OACONTROL, performance counter enable, note: 2106 * 2107 * "When this bit is set, in order to have coherent counts, 2108 * RC6 power state and trunk clock gating must be disabled. 2109 * This can be achieved by programming MMIO registers as 2110 * 0xA094=0 and 0xA090[31]=1" 2111 * 2112 * In our case we are expecting that taking pm + FORCEWAKE 2113 * references will effectively disable RC6. 2114 */ 2115 stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); 2116 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); 2117 2118 ret = alloc_oa_buffer(dev_priv); 2119 if (ret) 2120 goto err_oa_buf_alloc; 2121 2122 ret = i915_mutex_lock_interruptible(&dev_priv->drm); 2123 if (ret) 2124 goto err_lock; 2125 2126 stream->ops = &i915_oa_stream_ops; 2127 dev_priv->perf.oa.exclusive_stream = stream; 2128 2129 ret = dev_priv->perf.oa.ops.enable_metric_set(stream); 2130 if (ret) { 2131 DRM_DEBUG("Unable to enable metric set\n"); 2132 goto err_enable; 2133 } 2134 2135 mutex_unlock(&dev_priv->drm.struct_mutex); 2136 2137 return 0; 2138 2139 err_enable: 2140 dev_priv->perf.oa.exclusive_stream = NULL; 2141 dev_priv->perf.oa.ops.disable_metric_set(dev_priv); 2142 mutex_unlock(&dev_priv->drm.struct_mutex); 2143 2144 err_lock: 2145 free_oa_buffer(dev_priv); 2146 2147 err_oa_buf_alloc: 2148 put_oa_config(dev_priv, stream->oa_config); 2149 2150 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 2151 intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); 2152 2153 err_config: 2154 if (stream->ctx) 2155 oa_put_render_ctx_id(stream); 2156 2157 return ret; 2158 } 2159 2160 void i915_oa_init_reg_state(struct intel_engine_cs *engine, 2161 struct intel_context *ce, 2162 u32 *regs) 2163 { 2164 struct i915_perf_stream *stream; 2165 2166 if (engine->class != RENDER_CLASS) 2167 return; 2168 2169 stream = engine->i915->perf.oa.exclusive_stream; 2170 if (stream) 2171 gen8_update_reg_state_unlocked(ce, regs, stream->oa_config); 2172 } 2173 2174 /** 2175 * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation 2176 * @stream: An i915 perf stream 2177 * @file: An i915 perf stream file 2178 * @buf: destination buffer given by userspace 2179 * @count: the number of bytes userspace wants to read 2180 * @ppos: (inout) file seek position (unused) 2181 * 2182 * Besides wrapping &i915_perf_stream_ops->read this provides a common place to 2183 * ensure that if we've successfully copied any data then reporting that takes 2184 * precedence over any internal error status, so the data isn't lost. 2185 * 2186 * For example ret will be -ENOSPC whenever there is more buffered data than 2187 * can be copied to userspace, but that's only interesting if we weren't able 2188 * to copy some data because it implies the userspace buffer is too small to 2189 * receive a single record (and we never split records). 2190 * 2191 * Another case with ret == -EFAULT is more of a grey area since it would seem 2192 * like bad form for userspace to ask us to overrun its buffer, but the user 2193 * knows best: 2194 * 2195 * http://yarchive.net/comp/linux/partial_reads_writes.html 2196 * 2197 * Returns: The number of bytes copied or a negative error code on failure. 2198 */ 2199 static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream, 2200 struct file *file, 2201 char __user *buf, 2202 size_t count, 2203 loff_t *ppos) 2204 { 2205 /* Note we keep the offset (aka bytes read) separate from any 2206 * error status so that the final check for whether we return 2207 * the bytes read with a higher precedence than any error (see 2208 * comment below) doesn't need to be handled/duplicated in 2209 * stream->ops->read() implementations. 2210 */ 2211 size_t offset = 0; 2212 int ret = stream->ops->read(stream, buf, count, &offset); 2213 2214 return offset ?: (ret ?: -EAGAIN); 2215 } 2216 2217 /** 2218 * i915_perf_read - handles read() FOP for i915 perf stream FDs 2219 * @file: An i915 perf stream file 2220 * @buf: destination buffer given by userspace 2221 * @count: the number of bytes userspace wants to read 2222 * @ppos: (inout) file seek position (unused) 2223 * 2224 * The entry point for handling a read() on a stream file descriptor from 2225 * userspace. Most of the work is left to the i915_perf_read_locked() and 2226 * &i915_perf_stream_ops->read but to save having stream implementations (of 2227 * which we might have multiple later) we handle blocking read here. 2228 * 2229 * We can also consistently treat trying to read from a disabled stream 2230 * as an IO error so implementations can assume the stream is enabled 2231 * while reading. 2232 * 2233 * Returns: The number of bytes copied or a negative error code on failure. 2234 */ 2235 static ssize_t i915_perf_read(struct file *file, 2236 char __user *buf, 2237 size_t count, 2238 loff_t *ppos) 2239 { 2240 struct i915_perf_stream *stream = file->private_data; 2241 struct drm_i915_private *dev_priv = stream->dev_priv; 2242 ssize_t ret; 2243 2244 /* To ensure it's handled consistently we simply treat all reads of a 2245 * disabled stream as an error. In particular it might otherwise lead 2246 * to a deadlock for blocking file descriptors... 2247 */ 2248 if (!stream->enabled) 2249 return -EIO; 2250 2251 if (!(file->f_flags & O_NONBLOCK)) { 2252 /* There's the small chance of false positives from 2253 * stream->ops->wait_unlocked. 2254 * 2255 * E.g. with single context filtering since we only wait until 2256 * oabuffer has >= 1 report we don't immediately know whether 2257 * any reports really belong to the current context 2258 */ 2259 do { 2260 ret = stream->ops->wait_unlocked(stream); 2261 if (ret) 2262 return ret; 2263 2264 mutex_lock(&dev_priv->perf.lock); 2265 ret = i915_perf_read_locked(stream, file, 2266 buf, count, ppos); 2267 mutex_unlock(&dev_priv->perf.lock); 2268 } while (ret == -EAGAIN); 2269 } else { 2270 mutex_lock(&dev_priv->perf.lock); 2271 ret = i915_perf_read_locked(stream, file, buf, count, ppos); 2272 mutex_unlock(&dev_priv->perf.lock); 2273 } 2274 2275 /* We allow the poll checking to sometimes report false positive EPOLLIN 2276 * events where we might actually report EAGAIN on read() if there's 2277 * not really any data available. In this situation though we don't 2278 * want to enter a busy loop between poll() reporting a EPOLLIN event 2279 * and read() returning -EAGAIN. Clearing the oa.pollin state here 2280 * effectively ensures we back off until the next hrtimer callback 2281 * before reporting another EPOLLIN event. 2282 */ 2283 if (ret >= 0 || ret == -EAGAIN) { 2284 /* Maybe make ->pollin per-stream state if we support multiple 2285 * concurrent streams in the future. 2286 */ 2287 dev_priv->perf.oa.pollin = false; 2288 } 2289 2290 return ret; 2291 } 2292 2293 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) 2294 { 2295 struct drm_i915_private *dev_priv = 2296 container_of(hrtimer, typeof(*dev_priv), 2297 perf.oa.poll_check_timer); 2298 2299 if (oa_buffer_check_unlocked(dev_priv)) { 2300 dev_priv->perf.oa.pollin = true; 2301 wake_up(&dev_priv->perf.oa.poll_wq); 2302 } 2303 2304 hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD)); 2305 2306 return HRTIMER_RESTART; 2307 } 2308 2309 /** 2310 * i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream 2311 * @dev_priv: i915 device instance 2312 * @stream: An i915 perf stream 2313 * @file: An i915 perf stream file 2314 * @wait: poll() state table 2315 * 2316 * For handling userspace polling on an i915 perf stream, this calls through to 2317 * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that 2318 * will be woken for new stream data. 2319 * 2320 * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize 2321 * with any non-file-operation driver hooks. 2322 * 2323 * Returns: any poll events that are ready without sleeping 2324 */ 2325 static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv, 2326 struct i915_perf_stream *stream, 2327 struct file *file, 2328 poll_table *wait) 2329 { 2330 __poll_t events = 0; 2331 2332 stream->ops->poll_wait(stream, file, wait); 2333 2334 /* Note: we don't explicitly check whether there's something to read 2335 * here since this path may be very hot depending on what else 2336 * userspace is polling, or on the timeout in use. We rely solely on 2337 * the hrtimer/oa_poll_check_timer_cb to notify us when there are 2338 * samples to read. 2339 */ 2340 if (dev_priv->perf.oa.pollin) 2341 events |= EPOLLIN; 2342 2343 return events; 2344 } 2345 2346 /** 2347 * i915_perf_poll - call poll_wait() with a suitable wait queue for stream 2348 * @file: An i915 perf stream file 2349 * @wait: poll() state table 2350 * 2351 * For handling userspace polling on an i915 perf stream, this ensures 2352 * poll_wait() gets called with a wait queue that will be woken for new stream 2353 * data. 2354 * 2355 * Note: Implementation deferred to i915_perf_poll_locked() 2356 * 2357 * Returns: any poll events that are ready without sleeping 2358 */ 2359 static __poll_t i915_perf_poll(struct file *file, poll_table *wait) 2360 { 2361 struct i915_perf_stream *stream = file->private_data; 2362 struct drm_i915_private *dev_priv = stream->dev_priv; 2363 __poll_t ret; 2364 2365 mutex_lock(&dev_priv->perf.lock); 2366 ret = i915_perf_poll_locked(dev_priv, stream, file, wait); 2367 mutex_unlock(&dev_priv->perf.lock); 2368 2369 return ret; 2370 } 2371 2372 /** 2373 * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl 2374 * @stream: A disabled i915 perf stream 2375 * 2376 * [Re]enables the associated capture of data for this stream. 2377 * 2378 * If a stream was previously enabled then there's currently no intention 2379 * to provide userspace any guarantee about the preservation of previously 2380 * buffered data. 2381 */ 2382 static void i915_perf_enable_locked(struct i915_perf_stream *stream) 2383 { 2384 if (stream->enabled) 2385 return; 2386 2387 /* Allow stream->ops->enable() to refer to this */ 2388 stream->enabled = true; 2389 2390 if (stream->ops->enable) 2391 stream->ops->enable(stream); 2392 } 2393 2394 /** 2395 * i915_perf_disable_locked - handle `I915_PERF_IOCTL_DISABLE` ioctl 2396 * @stream: An enabled i915 perf stream 2397 * 2398 * Disables the associated capture of data for this stream. 2399 * 2400 * The intention is that disabling an re-enabling a stream will ideally be 2401 * cheaper than destroying and re-opening a stream with the same configuration, 2402 * though there are no formal guarantees about what state or buffered data 2403 * must be retained between disabling and re-enabling a stream. 2404 * 2405 * Note: while a stream is disabled it's considered an error for userspace 2406 * to attempt to read from the stream (-EIO). 2407 */ 2408 static void i915_perf_disable_locked(struct i915_perf_stream *stream) 2409 { 2410 if (!stream->enabled) 2411 return; 2412 2413 /* Allow stream->ops->disable() to refer to this */ 2414 stream->enabled = false; 2415 2416 if (stream->ops->disable) 2417 stream->ops->disable(stream); 2418 } 2419 2420 /** 2421 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs 2422 * @stream: An i915 perf stream 2423 * @cmd: the ioctl request 2424 * @arg: the ioctl data 2425 * 2426 * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize 2427 * with any non-file-operation driver hooks. 2428 * 2429 * Returns: zero on success or a negative error code. Returns -EINVAL for 2430 * an unknown ioctl request. 2431 */ 2432 static long i915_perf_ioctl_locked(struct i915_perf_stream *stream, 2433 unsigned int cmd, 2434 unsigned long arg) 2435 { 2436 switch (cmd) { 2437 case I915_PERF_IOCTL_ENABLE: 2438 i915_perf_enable_locked(stream); 2439 return 0; 2440 case I915_PERF_IOCTL_DISABLE: 2441 i915_perf_disable_locked(stream); 2442 return 0; 2443 } 2444 2445 return -EINVAL; 2446 } 2447 2448 /** 2449 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs 2450 * @file: An i915 perf stream file 2451 * @cmd: the ioctl request 2452 * @arg: the ioctl data 2453 * 2454 * Implementation deferred to i915_perf_ioctl_locked(). 2455 * 2456 * Returns: zero on success or a negative error code. Returns -EINVAL for 2457 * an unknown ioctl request. 2458 */ 2459 static long i915_perf_ioctl(struct file *file, 2460 unsigned int cmd, 2461 unsigned long arg) 2462 { 2463 struct i915_perf_stream *stream = file->private_data; 2464 struct drm_i915_private *dev_priv = stream->dev_priv; 2465 long ret; 2466 2467 mutex_lock(&dev_priv->perf.lock); 2468 ret = i915_perf_ioctl_locked(stream, cmd, arg); 2469 mutex_unlock(&dev_priv->perf.lock); 2470 2471 return ret; 2472 } 2473 2474 /** 2475 * i915_perf_destroy_locked - destroy an i915 perf stream 2476 * @stream: An i915 perf stream 2477 * 2478 * Frees all resources associated with the given i915 perf @stream, disabling 2479 * any associated data capture in the process. 2480 * 2481 * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize 2482 * with any non-file-operation driver hooks. 2483 */ 2484 static void i915_perf_destroy_locked(struct i915_perf_stream *stream) 2485 { 2486 if (stream->enabled) 2487 i915_perf_disable_locked(stream); 2488 2489 if (stream->ops->destroy) 2490 stream->ops->destroy(stream); 2491 2492 list_del(&stream->link); 2493 2494 if (stream->ctx) 2495 i915_gem_context_put(stream->ctx); 2496 2497 kfree(stream); 2498 } 2499 2500 /** 2501 * i915_perf_release - handles userspace close() of a stream file 2502 * @inode: anonymous inode associated with file 2503 * @file: An i915 perf stream file 2504 * 2505 * Cleans up any resources associated with an open i915 perf stream file. 2506 * 2507 * NB: close() can't really fail from the userspace point of view. 2508 * 2509 * Returns: zero on success or a negative error code. 2510 */ 2511 static int i915_perf_release(struct inode *inode, struct file *file) 2512 { 2513 struct i915_perf_stream *stream = file->private_data; 2514 struct drm_i915_private *dev_priv = stream->dev_priv; 2515 2516 mutex_lock(&dev_priv->perf.lock); 2517 i915_perf_destroy_locked(stream); 2518 mutex_unlock(&dev_priv->perf.lock); 2519 2520 return 0; 2521 } 2522 2523 2524 static const struct file_operations fops = { 2525 .owner = THIS_MODULE, 2526 .llseek = no_llseek, 2527 .release = i915_perf_release, 2528 .poll = i915_perf_poll, 2529 .read = i915_perf_read, 2530 .unlocked_ioctl = i915_perf_ioctl, 2531 /* Our ioctl have no arguments, so it's safe to use the same function 2532 * to handle 32bits compatibility. 2533 */ 2534 .compat_ioctl = i915_perf_ioctl, 2535 }; 2536 2537 2538 /** 2539 * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD 2540 * @dev_priv: i915 device instance 2541 * @param: The open parameters passed to 'DRM_I915_PERF_OPEN` 2542 * @props: individually validated u64 property value pairs 2543 * @file: drm file 2544 * 2545 * See i915_perf_ioctl_open() for interface details. 2546 * 2547 * Implements further stream config validation and stream initialization on 2548 * behalf of i915_perf_open_ioctl() with the &drm_i915_private->perf.lock mutex 2549 * taken to serialize with any non-file-operation driver hooks. 2550 * 2551 * Note: at this point the @props have only been validated in isolation and 2552 * it's still necessary to validate that the combination of properties makes 2553 * sense. 2554 * 2555 * In the case where userspace is interested in OA unit metrics then further 2556 * config validation and stream initialization details will be handled by 2557 * i915_oa_stream_init(). The code here should only validate config state that 2558 * will be relevant to all stream types / backends. 2559 * 2560 * Returns: zero on success or a negative error code. 2561 */ 2562 static int 2563 i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, 2564 struct drm_i915_perf_open_param *param, 2565 struct perf_open_properties *props, 2566 struct drm_file *file) 2567 { 2568 struct i915_gem_context *specific_ctx = NULL; 2569 struct i915_perf_stream *stream = NULL; 2570 unsigned long f_flags = 0; 2571 bool privileged_op = true; 2572 int stream_fd; 2573 int ret; 2574 2575 if (props->single_context) { 2576 u32 ctx_handle = props->ctx_handle; 2577 struct drm_i915_file_private *file_priv = file->driver_priv; 2578 2579 specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle); 2580 if (!specific_ctx) { 2581 DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n", 2582 ctx_handle); 2583 ret = -ENOENT; 2584 goto err; 2585 } 2586 } 2587 2588 /* 2589 * On Haswell the OA unit supports clock gating off for a specific 2590 * context and in this mode there's no visibility of metrics for the 2591 * rest of the system, which we consider acceptable for a 2592 * non-privileged client. 2593 * 2594 * For Gen8+ the OA unit no longer supports clock gating off for a 2595 * specific context and the kernel can't securely stop the counters 2596 * from updating as system-wide / global values. Even though we can 2597 * filter reports based on the included context ID we can't block 2598 * clients from seeing the raw / global counter values via 2599 * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to 2600 * enable the OA unit by default. 2601 */ 2602 if (IS_HASWELL(dev_priv) && specific_ctx) 2603 privileged_op = false; 2604 2605 /* Similar to perf's kernel.perf_paranoid_cpu sysctl option 2606 * we check a dev.i915.perf_stream_paranoid sysctl option 2607 * to determine if it's ok to access system wide OA counters 2608 * without CAP_SYS_ADMIN privileges. 2609 */ 2610 if (privileged_op && 2611 i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { 2612 DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n"); 2613 ret = -EACCES; 2614 goto err_ctx; 2615 } 2616 2617 stream = kzalloc(sizeof(*stream), GFP_KERNEL); 2618 if (!stream) { 2619 ret = -ENOMEM; 2620 goto err_ctx; 2621 } 2622 2623 stream->dev_priv = dev_priv; 2624 stream->ctx = specific_ctx; 2625 2626 ret = i915_oa_stream_init(stream, param, props); 2627 if (ret) 2628 goto err_alloc; 2629 2630 /* we avoid simply assigning stream->sample_flags = props->sample_flags 2631 * to have _stream_init check the combination of sample flags more 2632 * thoroughly, but still this is the expected result at this point. 2633 */ 2634 if (WARN_ON(stream->sample_flags != props->sample_flags)) { 2635 ret = -ENODEV; 2636 goto err_flags; 2637 } 2638 2639 list_add(&stream->link, &dev_priv->perf.streams); 2640 2641 if (param->flags & I915_PERF_FLAG_FD_CLOEXEC) 2642 f_flags |= O_CLOEXEC; 2643 if (param->flags & I915_PERF_FLAG_FD_NONBLOCK) 2644 f_flags |= O_NONBLOCK; 2645 2646 stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags); 2647 if (stream_fd < 0) { 2648 ret = stream_fd; 2649 goto err_open; 2650 } 2651 2652 if (!(param->flags & I915_PERF_FLAG_DISABLED)) 2653 i915_perf_enable_locked(stream); 2654 2655 return stream_fd; 2656 2657 err_open: 2658 list_del(&stream->link); 2659 err_flags: 2660 if (stream->ops->destroy) 2661 stream->ops->destroy(stream); 2662 err_alloc: 2663 kfree(stream); 2664 err_ctx: 2665 if (specific_ctx) 2666 i915_gem_context_put(specific_ctx); 2667 err: 2668 return ret; 2669 } 2670 2671 static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) 2672 { 2673 return div64_u64(1000000000ULL * (2ULL << exponent), 2674 1000ULL * RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz); 2675 } 2676 2677 /** 2678 * read_properties_unlocked - validate + copy userspace stream open properties 2679 * @dev_priv: i915 device instance 2680 * @uprops: The array of u64 key value pairs given by userspace 2681 * @n_props: The number of key value pairs expected in @uprops 2682 * @props: The stream configuration built up while validating properties 2683 * 2684 * Note this function only validates properties in isolation it doesn't 2685 * validate that the combination of properties makes sense or that all 2686 * properties necessary for a particular kind of stream have been set. 2687 * 2688 * Note that there currently aren't any ordering requirements for properties so 2689 * we shouldn't validate or assume anything about ordering here. This doesn't 2690 * rule out defining new properties with ordering requirements in the future. 2691 */ 2692 static int read_properties_unlocked(struct drm_i915_private *dev_priv, 2693 u64 __user *uprops, 2694 u32 n_props, 2695 struct perf_open_properties *props) 2696 { 2697 u64 __user *uprop = uprops; 2698 u32 i; 2699 2700 memset(props, 0, sizeof(struct perf_open_properties)); 2701 2702 if (!n_props) { 2703 DRM_DEBUG("No i915 perf properties given\n"); 2704 return -EINVAL; 2705 } 2706 2707 /* Considering that ID = 0 is reserved and assuming that we don't 2708 * (currently) expect any configurations to ever specify duplicate 2709 * values for a particular property ID then the last _PROP_MAX value is 2710 * one greater than the maximum number of properties we expect to get 2711 * from userspace. 2712 */ 2713 if (n_props >= DRM_I915_PERF_PROP_MAX) { 2714 DRM_DEBUG("More i915 perf properties specified than exist\n"); 2715 return -EINVAL; 2716 } 2717 2718 for (i = 0; i < n_props; i++) { 2719 u64 oa_period, oa_freq_hz; 2720 u64 id, value; 2721 int ret; 2722 2723 ret = get_user(id, uprop); 2724 if (ret) 2725 return ret; 2726 2727 ret = get_user(value, uprop + 1); 2728 if (ret) 2729 return ret; 2730 2731 if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) { 2732 DRM_DEBUG("Unknown i915 perf property ID\n"); 2733 return -EINVAL; 2734 } 2735 2736 switch ((enum drm_i915_perf_property_id)id) { 2737 case DRM_I915_PERF_PROP_CTX_HANDLE: 2738 props->single_context = 1; 2739 props->ctx_handle = value; 2740 break; 2741 case DRM_I915_PERF_PROP_SAMPLE_OA: 2742 if (value) 2743 props->sample_flags |= SAMPLE_OA_REPORT; 2744 break; 2745 case DRM_I915_PERF_PROP_OA_METRICS_SET: 2746 if (value == 0) { 2747 DRM_DEBUG("Unknown OA metric set ID\n"); 2748 return -EINVAL; 2749 } 2750 props->metrics_set = value; 2751 break; 2752 case DRM_I915_PERF_PROP_OA_FORMAT: 2753 if (value == 0 || value >= I915_OA_FORMAT_MAX) { 2754 DRM_DEBUG("Out-of-range OA report format %llu\n", 2755 value); 2756 return -EINVAL; 2757 } 2758 if (!dev_priv->perf.oa.oa_formats[value].size) { 2759 DRM_DEBUG("Unsupported OA report format %llu\n", 2760 value); 2761 return -EINVAL; 2762 } 2763 props->oa_format = value; 2764 break; 2765 case DRM_I915_PERF_PROP_OA_EXPONENT: 2766 if (value > OA_EXPONENT_MAX) { 2767 DRM_DEBUG("OA timer exponent too high (> %u)\n", 2768 OA_EXPONENT_MAX); 2769 return -EINVAL; 2770 } 2771 2772 /* Theoretically we can program the OA unit to sample 2773 * e.g. every 160ns for HSW, 167ns for BDW/SKL or 104ns 2774 * for BXT. We don't allow such high sampling 2775 * frequencies by default unless root. 2776 */ 2777 2778 BUILD_BUG_ON(sizeof(oa_period) != 8); 2779 oa_period = oa_exponent_to_ns(dev_priv, value); 2780 2781 /* This check is primarily to ensure that oa_period <= 2782 * UINT32_MAX (before passing to do_div which only 2783 * accepts a u32 denominator), but we can also skip 2784 * checking anything < 1Hz which implicitly can't be 2785 * limited via an integer oa_max_sample_rate. 2786 */ 2787 if (oa_period <= NSEC_PER_SEC) { 2788 u64 tmp = NSEC_PER_SEC; 2789 do_div(tmp, oa_period); 2790 oa_freq_hz = tmp; 2791 } else 2792 oa_freq_hz = 0; 2793 2794 if (oa_freq_hz > i915_oa_max_sample_rate && 2795 !capable(CAP_SYS_ADMIN)) { 2796 DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without root privileges\n", 2797 i915_oa_max_sample_rate); 2798 return -EACCES; 2799 } 2800 2801 props->oa_periodic = true; 2802 props->oa_period_exponent = value; 2803 break; 2804 case DRM_I915_PERF_PROP_MAX: 2805 MISSING_CASE(id); 2806 return -EINVAL; 2807 } 2808 2809 uprop += 2; 2810 } 2811 2812 return 0; 2813 } 2814 2815 /** 2816 * i915_perf_open_ioctl - DRM ioctl() for userspace to open a stream FD 2817 * @dev: drm device 2818 * @data: ioctl data copied from userspace (unvalidated) 2819 * @file: drm file 2820 * 2821 * Validates the stream open parameters given by userspace including flags 2822 * and an array of u64 key, value pair properties. 2823 * 2824 * Very little is assumed up front about the nature of the stream being 2825 * opened (for instance we don't assume it's for periodic OA unit metrics). An 2826 * i915-perf stream is expected to be a suitable interface for other forms of 2827 * buffered data written by the GPU besides periodic OA metrics. 2828 * 2829 * Note we copy the properties from userspace outside of the i915 perf 2830 * mutex to avoid an awkward lockdep with mmap_sem. 2831 * 2832 * Most of the implementation details are handled by 2833 * i915_perf_open_ioctl_locked() after taking the &drm_i915_private->perf.lock 2834 * mutex for serializing with any non-file-operation driver hooks. 2835 * 2836 * Return: A newly opened i915 Perf stream file descriptor or negative 2837 * error code on failure. 2838 */ 2839 int i915_perf_open_ioctl(struct drm_device *dev, void *data, 2840 struct drm_file *file) 2841 { 2842 struct drm_i915_private *dev_priv = dev->dev_private; 2843 struct drm_i915_perf_open_param *param = data; 2844 struct perf_open_properties props; 2845 u32 known_open_flags; 2846 int ret; 2847 2848 if (!dev_priv->perf.initialized) { 2849 DRM_DEBUG("i915 perf interface not available for this system\n"); 2850 return -ENOTSUPP; 2851 } 2852 2853 known_open_flags = I915_PERF_FLAG_FD_CLOEXEC | 2854 I915_PERF_FLAG_FD_NONBLOCK | 2855 I915_PERF_FLAG_DISABLED; 2856 if (param->flags & ~known_open_flags) { 2857 DRM_DEBUG("Unknown drm_i915_perf_open_param flag\n"); 2858 return -EINVAL; 2859 } 2860 2861 ret = read_properties_unlocked(dev_priv, 2862 u64_to_user_ptr(param->properties_ptr), 2863 param->num_properties, 2864 &props); 2865 if (ret) 2866 return ret; 2867 2868 mutex_lock(&dev_priv->perf.lock); 2869 ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file); 2870 mutex_unlock(&dev_priv->perf.lock); 2871 2872 return ret; 2873 } 2874 2875 /** 2876 * i915_perf_register - exposes i915-perf to userspace 2877 * @dev_priv: i915 device instance 2878 * 2879 * In particular OA metric sets are advertised under a sysfs metrics/ 2880 * directory allowing userspace to enumerate valid IDs that can be 2881 * used to open an i915-perf stream. 2882 */ 2883 void i915_perf_register(struct drm_i915_private *dev_priv) 2884 { 2885 int ret; 2886 2887 if (!dev_priv->perf.initialized) 2888 return; 2889 2890 /* To be sure we're synchronized with an attempted 2891 * i915_perf_open_ioctl(); considering that we register after 2892 * being exposed to userspace. 2893 */ 2894 mutex_lock(&dev_priv->perf.lock); 2895 2896 dev_priv->perf.metrics_kobj = 2897 kobject_create_and_add("metrics", 2898 &dev_priv->drm.primary->kdev->kobj); 2899 if (!dev_priv->perf.metrics_kobj) 2900 goto exit; 2901 2902 sysfs_attr_init(&dev_priv->perf.oa.test_config.sysfs_metric_id.attr); 2903 2904 if (INTEL_GEN(dev_priv) >= 11) { 2905 i915_perf_load_test_config_icl(dev_priv); 2906 } else if (IS_CANNONLAKE(dev_priv)) { 2907 i915_perf_load_test_config_cnl(dev_priv); 2908 } else if (IS_COFFEELAKE(dev_priv)) { 2909 if (IS_CFL_GT2(dev_priv)) 2910 i915_perf_load_test_config_cflgt2(dev_priv); 2911 if (IS_CFL_GT3(dev_priv)) 2912 i915_perf_load_test_config_cflgt3(dev_priv); 2913 } else if (IS_GEMINILAKE(dev_priv)) { 2914 i915_perf_load_test_config_glk(dev_priv); 2915 } else if (IS_KABYLAKE(dev_priv)) { 2916 if (IS_KBL_GT2(dev_priv)) 2917 i915_perf_load_test_config_kblgt2(dev_priv); 2918 else if (IS_KBL_GT3(dev_priv)) 2919 i915_perf_load_test_config_kblgt3(dev_priv); 2920 } else if (IS_BROXTON(dev_priv)) { 2921 i915_perf_load_test_config_bxt(dev_priv); 2922 } else if (IS_SKYLAKE(dev_priv)) { 2923 if (IS_SKL_GT2(dev_priv)) 2924 i915_perf_load_test_config_sklgt2(dev_priv); 2925 else if (IS_SKL_GT3(dev_priv)) 2926 i915_perf_load_test_config_sklgt3(dev_priv); 2927 else if (IS_SKL_GT4(dev_priv)) 2928 i915_perf_load_test_config_sklgt4(dev_priv); 2929 } else if (IS_CHERRYVIEW(dev_priv)) { 2930 i915_perf_load_test_config_chv(dev_priv); 2931 } else if (IS_BROADWELL(dev_priv)) { 2932 i915_perf_load_test_config_bdw(dev_priv); 2933 } else if (IS_HASWELL(dev_priv)) { 2934 i915_perf_load_test_config_hsw(dev_priv); 2935 } 2936 2937 if (dev_priv->perf.oa.test_config.id == 0) 2938 goto sysfs_error; 2939 2940 ret = sysfs_create_group(dev_priv->perf.metrics_kobj, 2941 &dev_priv->perf.oa.test_config.sysfs_metric); 2942 if (ret) 2943 goto sysfs_error; 2944 2945 atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1); 2946 2947 goto exit; 2948 2949 sysfs_error: 2950 kobject_put(dev_priv->perf.metrics_kobj); 2951 dev_priv->perf.metrics_kobj = NULL; 2952 2953 exit: 2954 mutex_unlock(&dev_priv->perf.lock); 2955 } 2956 2957 /** 2958 * i915_perf_unregister - hide i915-perf from userspace 2959 * @dev_priv: i915 device instance 2960 * 2961 * i915-perf state cleanup is split up into an 'unregister' and 2962 * 'deinit' phase where the interface is first hidden from 2963 * userspace by i915_perf_unregister() before cleaning up 2964 * remaining state in i915_perf_fini(). 2965 */ 2966 void i915_perf_unregister(struct drm_i915_private *dev_priv) 2967 { 2968 if (!dev_priv->perf.metrics_kobj) 2969 return; 2970 2971 sysfs_remove_group(dev_priv->perf.metrics_kobj, 2972 &dev_priv->perf.oa.test_config.sysfs_metric); 2973 2974 kobject_put(dev_priv->perf.metrics_kobj); 2975 dev_priv->perf.metrics_kobj = NULL; 2976 } 2977 2978 static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) 2979 { 2980 static const i915_reg_t flex_eu_regs[] = { 2981 EU_PERF_CNTL0, 2982 EU_PERF_CNTL1, 2983 EU_PERF_CNTL2, 2984 EU_PERF_CNTL3, 2985 EU_PERF_CNTL4, 2986 EU_PERF_CNTL5, 2987 EU_PERF_CNTL6, 2988 }; 2989 int i; 2990 2991 for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { 2992 if (i915_mmio_reg_offset(flex_eu_regs[i]) == addr) 2993 return true; 2994 } 2995 return false; 2996 } 2997 2998 static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr) 2999 { 3000 return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) && 3001 addr <= i915_mmio_reg_offset(OASTARTTRIG8)) || 3002 (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) && 3003 addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) || 3004 (addr >= i915_mmio_reg_offset(OACEC0_0) && 3005 addr <= i915_mmio_reg_offset(OACEC7_1)); 3006 } 3007 3008 static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) 3009 { 3010 return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) || 3011 (addr >= i915_mmio_reg_offset(MICRO_BP0_0) && 3012 addr <= i915_mmio_reg_offset(NOA_WRITE)) || 3013 (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) && 3014 addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) || 3015 (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) && 3016 addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI)); 3017 } 3018 3019 static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) 3020 { 3021 return gen7_is_valid_mux_addr(dev_priv, addr) || 3022 addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) || 3023 (addr >= i915_mmio_reg_offset(RPM_CONFIG0) && 3024 addr <= i915_mmio_reg_offset(NOA_CONFIG(8))); 3025 } 3026 3027 static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) 3028 { 3029 return gen8_is_valid_mux_addr(dev_priv, addr) || 3030 addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) || 3031 (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) && 3032 addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI)); 3033 } 3034 3035 static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) 3036 { 3037 return gen7_is_valid_mux_addr(dev_priv, addr) || 3038 (addr >= 0x25100 && addr <= 0x2FF90) || 3039 (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) && 3040 addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) || 3041 addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0); 3042 } 3043 3044 static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) 3045 { 3046 return gen7_is_valid_mux_addr(dev_priv, addr) || 3047 (addr >= 0x182300 && addr <= 0x1823A4); 3048 } 3049 3050 static u32 mask_reg_value(u32 reg, u32 val) 3051 { 3052 /* HALF_SLICE_CHICKEN2 is programmed with a the 3053 * WaDisableSTUnitPowerOptimization workaround. Make sure the value 3054 * programmed by userspace doesn't change this. 3055 */ 3056 if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg) 3057 val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE); 3058 3059 /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function 3060 * indicated by its name and a bunch of selection fields used by OA 3061 * configs. 3062 */ 3063 if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg) 3064 val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE); 3065 3066 return val; 3067 } 3068 3069 static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv, 3070 bool (*is_valid)(struct drm_i915_private *dev_priv, u32 addr), 3071 u32 __user *regs, 3072 u32 n_regs) 3073 { 3074 struct i915_oa_reg *oa_regs; 3075 int err; 3076 u32 i; 3077 3078 if (!n_regs) 3079 return NULL; 3080 3081 if (!access_ok(regs, n_regs * sizeof(u32) * 2)) 3082 return ERR_PTR(-EFAULT); 3083 3084 /* No is_valid function means we're not allowing any register to be programmed. */ 3085 GEM_BUG_ON(!is_valid); 3086 if (!is_valid) 3087 return ERR_PTR(-EINVAL); 3088 3089 oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL); 3090 if (!oa_regs) 3091 return ERR_PTR(-ENOMEM); 3092 3093 for (i = 0; i < n_regs; i++) { 3094 u32 addr, value; 3095 3096 err = get_user(addr, regs); 3097 if (err) 3098 goto addr_err; 3099 3100 if (!is_valid(dev_priv, addr)) { 3101 DRM_DEBUG("Invalid oa_reg address: %X\n", addr); 3102 err = -EINVAL; 3103 goto addr_err; 3104 } 3105 3106 err = get_user(value, regs + 1); 3107 if (err) 3108 goto addr_err; 3109 3110 oa_regs[i].addr = _MMIO(addr); 3111 oa_regs[i].value = mask_reg_value(addr, value); 3112 3113 regs += 2; 3114 } 3115 3116 return oa_regs; 3117 3118 addr_err: 3119 kfree(oa_regs); 3120 return ERR_PTR(err); 3121 } 3122 3123 static ssize_t show_dynamic_id(struct device *dev, 3124 struct device_attribute *attr, 3125 char *buf) 3126 { 3127 struct i915_oa_config *oa_config = 3128 container_of(attr, typeof(*oa_config), sysfs_metric_id); 3129 3130 return sprintf(buf, "%d\n", oa_config->id); 3131 } 3132 3133 static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, 3134 struct i915_oa_config *oa_config) 3135 { 3136 sysfs_attr_init(&oa_config->sysfs_metric_id.attr); 3137 oa_config->sysfs_metric_id.attr.name = "id"; 3138 oa_config->sysfs_metric_id.attr.mode = S_IRUGO; 3139 oa_config->sysfs_metric_id.show = show_dynamic_id; 3140 oa_config->sysfs_metric_id.store = NULL; 3141 3142 oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr; 3143 oa_config->attrs[1] = NULL; 3144 3145 oa_config->sysfs_metric.name = oa_config->uuid; 3146 oa_config->sysfs_metric.attrs = oa_config->attrs; 3147 3148 return sysfs_create_group(dev_priv->perf.metrics_kobj, 3149 &oa_config->sysfs_metric); 3150 } 3151 3152 /** 3153 * i915_perf_add_config_ioctl - DRM ioctl() for userspace to add a new OA config 3154 * @dev: drm device 3155 * @data: ioctl data (pointer to struct drm_i915_perf_oa_config) copied from 3156 * userspace (unvalidated) 3157 * @file: drm file 3158 * 3159 * Validates the submitted OA register to be saved into a new OA config that 3160 * can then be used for programming the OA unit and its NOA network. 3161 * 3162 * Returns: A new allocated config number to be used with the perf open ioctl 3163 * or a negative error code on failure. 3164 */ 3165 int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, 3166 struct drm_file *file) 3167 { 3168 struct drm_i915_private *dev_priv = dev->dev_private; 3169 struct drm_i915_perf_oa_config *args = data; 3170 struct i915_oa_config *oa_config, *tmp; 3171 int err, id; 3172 3173 if (!dev_priv->perf.initialized) { 3174 DRM_DEBUG("i915 perf interface not available for this system\n"); 3175 return -ENOTSUPP; 3176 } 3177 3178 if (!dev_priv->perf.metrics_kobj) { 3179 DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); 3180 return -EINVAL; 3181 } 3182 3183 if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { 3184 DRM_DEBUG("Insufficient privileges to add i915 OA config\n"); 3185 return -EACCES; 3186 } 3187 3188 if ((!args->mux_regs_ptr || !args->n_mux_regs) && 3189 (!args->boolean_regs_ptr || !args->n_boolean_regs) && 3190 (!args->flex_regs_ptr || !args->n_flex_regs)) { 3191 DRM_DEBUG("No OA registers given\n"); 3192 return -EINVAL; 3193 } 3194 3195 oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); 3196 if (!oa_config) { 3197 DRM_DEBUG("Failed to allocate memory for the OA config\n"); 3198 return -ENOMEM; 3199 } 3200 3201 atomic_set(&oa_config->ref_count, 1); 3202 3203 if (!uuid_is_valid(args->uuid)) { 3204 DRM_DEBUG("Invalid uuid format for OA config\n"); 3205 err = -EINVAL; 3206 goto reg_err; 3207 } 3208 3209 /* Last character in oa_config->uuid will be 0 because oa_config is 3210 * kzalloc. 3211 */ 3212 memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid)); 3213 3214 oa_config->mux_regs_len = args->n_mux_regs; 3215 oa_config->mux_regs = 3216 alloc_oa_regs(dev_priv, 3217 dev_priv->perf.oa.ops.is_valid_mux_reg, 3218 u64_to_user_ptr(args->mux_regs_ptr), 3219 args->n_mux_regs); 3220 3221 if (IS_ERR(oa_config->mux_regs)) { 3222 DRM_DEBUG("Failed to create OA config for mux_regs\n"); 3223 err = PTR_ERR(oa_config->mux_regs); 3224 goto reg_err; 3225 } 3226 3227 oa_config->b_counter_regs_len = args->n_boolean_regs; 3228 oa_config->b_counter_regs = 3229 alloc_oa_regs(dev_priv, 3230 dev_priv->perf.oa.ops.is_valid_b_counter_reg, 3231 u64_to_user_ptr(args->boolean_regs_ptr), 3232 args->n_boolean_regs); 3233 3234 if (IS_ERR(oa_config->b_counter_regs)) { 3235 DRM_DEBUG("Failed to create OA config for b_counter_regs\n"); 3236 err = PTR_ERR(oa_config->b_counter_regs); 3237 goto reg_err; 3238 } 3239 3240 if (INTEL_GEN(dev_priv) < 8) { 3241 if (args->n_flex_regs != 0) { 3242 err = -EINVAL; 3243 goto reg_err; 3244 } 3245 } else { 3246 oa_config->flex_regs_len = args->n_flex_regs; 3247 oa_config->flex_regs = 3248 alloc_oa_regs(dev_priv, 3249 dev_priv->perf.oa.ops.is_valid_flex_reg, 3250 u64_to_user_ptr(args->flex_regs_ptr), 3251 args->n_flex_regs); 3252 3253 if (IS_ERR(oa_config->flex_regs)) { 3254 DRM_DEBUG("Failed to create OA config for flex_regs\n"); 3255 err = PTR_ERR(oa_config->flex_regs); 3256 goto reg_err; 3257 } 3258 } 3259 3260 err = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); 3261 if (err) 3262 goto reg_err; 3263 3264 /* We shouldn't have too many configs, so this iteration shouldn't be 3265 * too costly. 3266 */ 3267 idr_for_each_entry(&dev_priv->perf.metrics_idr, tmp, id) { 3268 if (!strcmp(tmp->uuid, oa_config->uuid)) { 3269 DRM_DEBUG("OA config already exists with this uuid\n"); 3270 err = -EADDRINUSE; 3271 goto sysfs_err; 3272 } 3273 } 3274 3275 err = create_dynamic_oa_sysfs_entry(dev_priv, oa_config); 3276 if (err) { 3277 DRM_DEBUG("Failed to create sysfs entry for OA config\n"); 3278 goto sysfs_err; 3279 } 3280 3281 /* Config id 0 is invalid, id 1 for kernel stored test config. */ 3282 oa_config->id = idr_alloc(&dev_priv->perf.metrics_idr, 3283 oa_config, 2, 3284 0, GFP_KERNEL); 3285 if (oa_config->id < 0) { 3286 DRM_DEBUG("Failed to create sysfs entry for OA config\n"); 3287 err = oa_config->id; 3288 goto sysfs_err; 3289 } 3290 3291 mutex_unlock(&dev_priv->perf.metrics_lock); 3292 3293 DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id); 3294 3295 return oa_config->id; 3296 3297 sysfs_err: 3298 mutex_unlock(&dev_priv->perf.metrics_lock); 3299 reg_err: 3300 put_oa_config(dev_priv, oa_config); 3301 DRM_DEBUG("Failed to add new OA config\n"); 3302 return err; 3303 } 3304 3305 /** 3306 * i915_perf_remove_config_ioctl - DRM ioctl() for userspace to remove an OA config 3307 * @dev: drm device 3308 * @data: ioctl data (pointer to u64 integer) copied from userspace 3309 * @file: drm file 3310 * 3311 * Configs can be removed while being used, the will stop appearing in sysfs 3312 * and their content will be freed when the stream using the config is closed. 3313 * 3314 * Returns: 0 on success or a negative error code on failure. 3315 */ 3316 int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, 3317 struct drm_file *file) 3318 { 3319 struct drm_i915_private *dev_priv = dev->dev_private; 3320 u64 *arg = data; 3321 struct i915_oa_config *oa_config; 3322 int ret; 3323 3324 if (!dev_priv->perf.initialized) { 3325 DRM_DEBUG("i915 perf interface not available for this system\n"); 3326 return -ENOTSUPP; 3327 } 3328 3329 if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { 3330 DRM_DEBUG("Insufficient privileges to remove i915 OA config\n"); 3331 return -EACCES; 3332 } 3333 3334 ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); 3335 if (ret) 3336 goto lock_err; 3337 3338 oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg); 3339 if (!oa_config) { 3340 DRM_DEBUG("Failed to remove unknown OA config\n"); 3341 ret = -ENOENT; 3342 goto config_err; 3343 } 3344 3345 GEM_BUG_ON(*arg != oa_config->id); 3346 3347 sysfs_remove_group(dev_priv->perf.metrics_kobj, 3348 &oa_config->sysfs_metric); 3349 3350 idr_remove(&dev_priv->perf.metrics_idr, *arg); 3351 3352 DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); 3353 3354 put_oa_config(dev_priv, oa_config); 3355 3356 config_err: 3357 mutex_unlock(&dev_priv->perf.metrics_lock); 3358 lock_err: 3359 return ret; 3360 } 3361 3362 static struct ctl_table oa_table[] = { 3363 { 3364 .procname = "perf_stream_paranoid", 3365 .data = &i915_perf_stream_paranoid, 3366 .maxlen = sizeof(i915_perf_stream_paranoid), 3367 .mode = 0644, 3368 .proc_handler = proc_dointvec_minmax, 3369 .extra1 = &zero, 3370 .extra2 = &one, 3371 }, 3372 { 3373 .procname = "oa_max_sample_rate", 3374 .data = &i915_oa_max_sample_rate, 3375 .maxlen = sizeof(i915_oa_max_sample_rate), 3376 .mode = 0644, 3377 .proc_handler = proc_dointvec_minmax, 3378 .extra1 = &zero, 3379 .extra2 = &oa_sample_rate_hard_limit, 3380 }, 3381 {} 3382 }; 3383 3384 static struct ctl_table i915_root[] = { 3385 { 3386 .procname = "i915", 3387 .maxlen = 0, 3388 .mode = 0555, 3389 .child = oa_table, 3390 }, 3391 {} 3392 }; 3393 3394 static struct ctl_table dev_root[] = { 3395 { 3396 .procname = "dev", 3397 .maxlen = 0, 3398 .mode = 0555, 3399 .child = i915_root, 3400 }, 3401 {} 3402 }; 3403 3404 /** 3405 * i915_perf_init - initialize i915-perf state on module load 3406 * @dev_priv: i915 device instance 3407 * 3408 * Initializes i915-perf state without exposing anything to userspace. 3409 * 3410 * Note: i915-perf initialization is split into an 'init' and 'register' 3411 * phase with the i915_perf_register() exposing state to userspace. 3412 */ 3413 void i915_perf_init(struct drm_i915_private *dev_priv) 3414 { 3415 if (IS_HASWELL(dev_priv)) { 3416 dev_priv->perf.oa.ops.is_valid_b_counter_reg = 3417 gen7_is_valid_b_counter_addr; 3418 dev_priv->perf.oa.ops.is_valid_mux_reg = 3419 hsw_is_valid_mux_addr; 3420 dev_priv->perf.oa.ops.is_valid_flex_reg = NULL; 3421 dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set; 3422 dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set; 3423 dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable; 3424 dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable; 3425 dev_priv->perf.oa.ops.read = gen7_oa_read; 3426 dev_priv->perf.oa.ops.oa_hw_tail_read = 3427 gen7_oa_hw_tail_read; 3428 3429 dev_priv->perf.oa.oa_formats = hsw_oa_formats; 3430 } else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 3431 /* Note: that although we could theoretically also support the 3432 * legacy ringbuffer mode on BDW (and earlier iterations of 3433 * this driver, before upstreaming did this) it didn't seem 3434 * worth the complexity to maintain now that BDW+ enable 3435 * execlist mode by default. 3436 */ 3437 dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats; 3438 3439 dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable; 3440 dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable; 3441 dev_priv->perf.oa.ops.read = gen8_oa_read; 3442 dev_priv->perf.oa.ops.oa_hw_tail_read = gen8_oa_hw_tail_read; 3443 3444 if (IS_GEN_RANGE(dev_priv, 8, 9)) { 3445 dev_priv->perf.oa.ops.is_valid_b_counter_reg = 3446 gen7_is_valid_b_counter_addr; 3447 dev_priv->perf.oa.ops.is_valid_mux_reg = 3448 gen8_is_valid_mux_addr; 3449 dev_priv->perf.oa.ops.is_valid_flex_reg = 3450 gen8_is_valid_flex_addr; 3451 3452 if (IS_CHERRYVIEW(dev_priv)) { 3453 dev_priv->perf.oa.ops.is_valid_mux_reg = 3454 chv_is_valid_mux_addr; 3455 } 3456 3457 dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; 3458 dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set; 3459 3460 if (IS_GEN(dev_priv, 8)) { 3461 dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; 3462 dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; 3463 3464 dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); 3465 } else { 3466 dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; 3467 dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; 3468 3469 dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); 3470 } 3471 } else if (IS_GEN_RANGE(dev_priv, 10, 11)) { 3472 dev_priv->perf.oa.ops.is_valid_b_counter_reg = 3473 gen7_is_valid_b_counter_addr; 3474 dev_priv->perf.oa.ops.is_valid_mux_reg = 3475 gen10_is_valid_mux_addr; 3476 dev_priv->perf.oa.ops.is_valid_flex_reg = 3477 gen8_is_valid_flex_addr; 3478 3479 dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; 3480 dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set; 3481 3482 dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; 3483 dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; 3484 3485 dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); 3486 } 3487 } 3488 3489 if (dev_priv->perf.oa.ops.enable_metric_set) { 3490 hrtimer_init(&dev_priv->perf.oa.poll_check_timer, 3491 CLOCK_MONOTONIC, HRTIMER_MODE_REL); 3492 dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb; 3493 init_waitqueue_head(&dev_priv->perf.oa.poll_wq); 3494 3495 INIT_LIST_HEAD(&dev_priv->perf.streams); 3496 mutex_init(&dev_priv->perf.lock); 3497 spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock); 3498 3499 oa_sample_rate_hard_limit = 1000 * 3500 (RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2); 3501 dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); 3502 3503 mutex_init(&dev_priv->perf.metrics_lock); 3504 idr_init(&dev_priv->perf.metrics_idr); 3505 3506 dev_priv->perf.initialized = true; 3507 } 3508 } 3509 3510 static int destroy_config(int id, void *p, void *data) 3511 { 3512 struct drm_i915_private *dev_priv = data; 3513 struct i915_oa_config *oa_config = p; 3514 3515 put_oa_config(dev_priv, oa_config); 3516 3517 return 0; 3518 } 3519 3520 /** 3521 * i915_perf_fini - Counter part to i915_perf_init() 3522 * @dev_priv: i915 device instance 3523 */ 3524 void i915_perf_fini(struct drm_i915_private *dev_priv) 3525 { 3526 if (!dev_priv->perf.initialized) 3527 return; 3528 3529 idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv); 3530 idr_destroy(&dev_priv->perf.metrics_idr); 3531 3532 unregister_sysctl_table(dev_priv->perf.sysctl_header); 3533 3534 memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops)); 3535 3536 dev_priv->perf.initialized = false; 3537 } 3538