1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include <linux/kref.h> 8 9 #include "gem/i915_gem_pm.h" 10 #include "gt/intel_gt.h" 11 12 #include "i915_selftest.h" 13 14 #include "igt_flush_test.h" 15 #include "lib_sw_fence.h" 16 17 static struct i915_perf_stream * 18 test_stream(struct i915_perf *perf) 19 { 20 struct drm_i915_perf_open_param param = {}; 21 struct perf_open_properties props = { 22 .engine = intel_engine_lookup_user(perf->i915, 23 I915_ENGINE_CLASS_RENDER, 24 0), 25 .sample_flags = SAMPLE_OA_REPORT, 26 .oa_format = IS_GEN(perf->i915, 12) ? 27 I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8, 28 .metrics_set = 1, 29 }; 30 struct i915_perf_stream *stream; 31 32 stream = kzalloc(sizeof(*stream), GFP_KERNEL); 33 if (!stream) 34 return NULL; 35 36 stream->perf = perf; 37 38 mutex_lock(&perf->lock); 39 if (i915_oa_stream_init(stream, ¶m, &props)) { 40 kfree(stream); 41 stream = NULL; 42 } 43 mutex_unlock(&perf->lock); 44 45 return stream; 46 } 47 48 static void stream_destroy(struct i915_perf_stream *stream) 49 { 50 struct i915_perf *perf = stream->perf; 51 52 mutex_lock(&perf->lock); 53 i915_perf_destroy_locked(stream); 54 mutex_unlock(&perf->lock); 55 } 56 57 static int live_sanitycheck(void *arg) 58 { 59 struct drm_i915_private *i915 = arg; 60 struct i915_perf_stream *stream; 61 62 /* Quick check we can create a perf stream */ 63 64 stream = test_stream(&i915->perf); 65 if (!stream) 66 return -EINVAL; 67 68 stream_destroy(stream); 69 return 0; 70 } 71 72 static int write_timestamp(struct i915_request *rq, int slot) 73 { 74 u32 *cs; 75 int len; 76 77 cs = intel_ring_begin(rq, 6); 78 if (IS_ERR(cs)) 79 return PTR_ERR(cs); 80 81 len = 5; 82 if (INTEL_GEN(rq->i915) >= 8) 83 len++; 84 85 *cs++ = GFX_OP_PIPE_CONTROL(len); 86 *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | 87 PIPE_CONTROL_STORE_DATA_INDEX | 88 PIPE_CONTROL_WRITE_TIMESTAMP; 89 *cs++ = slot * sizeof(u32); 90 *cs++ = 0; 91 *cs++ = 0; 92 *cs++ = 0; 93 94 intel_ring_advance(rq, cs); 95 96 return 0; 97 } 98 99 static ktime_t poll_status(struct i915_request *rq, int slot) 100 { 101 while (!intel_read_status_page(rq->engine, slot) && 102 !i915_request_completed(rq)) 103 cpu_relax(); 104 105 return ktime_get(); 106 } 107 108 static int live_noa_delay(void *arg) 109 { 110 struct drm_i915_private *i915 = arg; 111 struct i915_perf_stream *stream; 112 struct i915_request *rq; 113 ktime_t t0, t1; 114 u64 expected; 115 u32 delay; 116 int err; 117 int i; 118 119 /* Check that the GPU delays matches expectations */ 120 121 stream = test_stream(&i915->perf); 122 if (!stream) 123 return -ENOMEM; 124 125 expected = atomic64_read(&stream->perf->noa_programming_delay); 126 127 if (stream->engine->class != RENDER_CLASS) { 128 err = -ENODEV; 129 goto out; 130 } 131 132 for (i = 0; i < 4; i++) 133 intel_write_status_page(stream->engine, 0x100 + i, 0); 134 135 rq = i915_request_create(stream->engine->kernel_context); 136 if (IS_ERR(rq)) { 137 err = PTR_ERR(rq); 138 goto out; 139 } 140 141 if (rq->engine->emit_init_breadcrumb && 142 i915_request_timeline(rq)->has_initial_breadcrumb) { 143 err = rq->engine->emit_init_breadcrumb(rq); 144 if (err) { 145 i915_request_add(rq); 146 goto out; 147 } 148 } 149 150 err = write_timestamp(rq, 0x100); 151 if (err) { 152 i915_request_add(rq); 153 goto out; 154 } 155 156 err = rq->engine->emit_bb_start(rq, 157 i915_ggtt_offset(stream->noa_wait), 0, 158 I915_DISPATCH_SECURE); 159 if (err) { 160 i915_request_add(rq); 161 goto out; 162 } 163 164 err = write_timestamp(rq, 0x102); 165 if (err) { 166 i915_request_add(rq); 167 goto out; 168 } 169 170 i915_request_get(rq); 171 i915_request_add(rq); 172 173 preempt_disable(); 174 t0 = poll_status(rq, 0x100); 175 t1 = poll_status(rq, 0x102); 176 preempt_enable(); 177 178 pr_info("CPU delay: %lluns, expected %lluns\n", 179 ktime_sub(t1, t0), expected); 180 181 delay = intel_read_status_page(stream->engine, 0x102); 182 delay -= intel_read_status_page(stream->engine, 0x100); 183 delay = div_u64(mul_u32_u32(delay, 1000 * 1000), 184 RUNTIME_INFO(i915)->cs_timestamp_frequency_khz); 185 pr_info("GPU delay: %uns, expected %lluns\n", 186 delay, expected); 187 188 if (4 * delay < 3 * expected || 2 * delay > 3 * expected) { 189 pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n", 190 delay / 1000, 191 div_u64(3 * expected, 4000), 192 div_u64(3 * expected, 2000)); 193 err = -EINVAL; 194 } 195 196 i915_request_put(rq); 197 out: 198 stream_destroy(stream); 199 return err; 200 } 201 202 int i915_perf_live_selftests(struct drm_i915_private *i915) 203 { 204 static const struct i915_subtest tests[] = { 205 SUBTEST(live_sanitycheck), 206 SUBTEST(live_noa_delay), 207 }; 208 struct i915_perf *perf = &i915->perf; 209 210 if (!perf->metrics_kobj || !perf->ops.enable_metric_set) 211 return 0; 212 213 if (intel_gt_is_wedged(&i915->gt)) 214 return 0; 215 216 return i915_subtests(tests, i915); 217 } 218