1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/module.h> 24 #include <linux/fdtable.h> 25 #include <linux/uaccess.h> 26 #include <linux/firmware.h> 27 #include <drm/drmP.h> 28 #include "amdgpu.h" 29 #include "amdgpu_amdkfd.h" 30 #include "amdgpu_ucode.h" 31 #include "gfx_v8_0.h" 32 #include "gca/gfx_8_0_sh_mask.h" 33 #include "gca/gfx_8_0_d.h" 34 #include "gca/gfx_8_0_enum.h" 35 #include "oss/oss_3_0_sh_mask.h" 36 #include "oss/oss_3_0_d.h" 37 #include "gmc/gmc_8_1_sh_mask.h" 38 #include "gmc/gmc_8_1_d.h" 39 #include "vi_structs.h" 40 #include "vid.h" 41 42 struct cik_sdma_rlc_registers; 43 44 /* 45 * Register access functions 46 */ 47 48 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, 49 uint32_t sh_mem_config, 50 uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, 51 uint32_t sh_mem_bases); 52 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, 53 unsigned int vmid); 54 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, 55 uint32_t hpd_size, uint64_t hpd_gpu_addr); 56 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); 57 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 58 uint32_t queue_id, uint32_t __user *wptr); 59 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); 60 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 61 uint32_t pipe_id, uint32_t queue_id); 62 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); 63 static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, 64 unsigned int utimeout, uint32_t pipe_id, 65 uint32_t queue_id); 66 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 67 unsigned int utimeout); 68 static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); 69 static int kgd_address_watch_disable(struct kgd_dev *kgd); 70 static int kgd_address_watch_execute(struct kgd_dev *kgd, 71 unsigned int watch_point_id, 72 uint32_t cntl_val, 73 uint32_t addr_hi, 74 uint32_t addr_lo); 75 static int kgd_wave_control_execute(struct kgd_dev *kgd, 76 uint32_t gfx_index_val, 77 uint32_t sq_cmd); 78 static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, 79 unsigned int watch_point_id, 80 unsigned int reg_offset); 81 82 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, 83 uint8_t vmid); 84 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 85 uint8_t vmid); 86 static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); 87 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); 88 89 static const struct kfd2kgd_calls kfd2kgd = { 90 .init_gtt_mem_allocation = alloc_gtt_mem, 91 .free_gtt_mem = free_gtt_mem, 92 .get_vmem_size = get_vmem_size, 93 .get_gpu_clock_counter = get_gpu_clock_counter, 94 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, 95 .program_sh_mem_settings = kgd_program_sh_mem_settings, 96 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, 97 .init_pipeline = kgd_init_pipeline, 98 .init_interrupts = kgd_init_interrupts, 99 .hqd_load = kgd_hqd_load, 100 .hqd_sdma_load = kgd_hqd_sdma_load, 101 .hqd_is_occupied = kgd_hqd_is_occupied, 102 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, 103 .hqd_destroy = kgd_hqd_destroy, 104 .hqd_sdma_destroy = kgd_hqd_sdma_destroy, 105 .address_watch_disable = kgd_address_watch_disable, 106 .address_watch_execute = kgd_address_watch_execute, 107 .wave_control_execute = kgd_wave_control_execute, 108 .address_watch_get_offset = kgd_address_watch_get_offset, 109 .get_atc_vmid_pasid_mapping_pasid = 110 get_atc_vmid_pasid_mapping_pasid, 111 .get_atc_vmid_pasid_mapping_valid = 112 get_atc_vmid_pasid_mapping_valid, 113 .write_vmid_invalidate_request = write_vmid_invalidate_request, 114 .get_fw_version = get_fw_version 115 }; 116 117 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) 118 { 119 return (struct kfd2kgd_calls *)&kfd2kgd; 120 } 121 122 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) 123 { 124 return (struct amdgpu_device *)kgd; 125 } 126 127 static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, 128 uint32_t queue, uint32_t vmid) 129 { 130 struct amdgpu_device *adev = get_amdgpu_device(kgd); 131 uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); 132 133 mutex_lock(&adev->srbm_mutex); 134 WREG32(mmSRBM_GFX_CNTL, value); 135 } 136 137 static void unlock_srbm(struct kgd_dev *kgd) 138 { 139 struct amdgpu_device *adev = get_amdgpu_device(kgd); 140 141 WREG32(mmSRBM_GFX_CNTL, 0); 142 mutex_unlock(&adev->srbm_mutex); 143 } 144 145 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, 146 uint32_t queue_id) 147 { 148 struct amdgpu_device *adev = get_amdgpu_device(kgd); 149 150 uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 151 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 152 153 lock_srbm(kgd, mec, pipe, queue_id, 0); 154 } 155 156 static void release_queue(struct kgd_dev *kgd) 157 { 158 unlock_srbm(kgd); 159 } 160 161 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, 162 uint32_t sh_mem_config, 163 uint32_t sh_mem_ape1_base, 164 uint32_t sh_mem_ape1_limit, 165 uint32_t sh_mem_bases) 166 { 167 struct amdgpu_device *adev = get_amdgpu_device(kgd); 168 169 lock_srbm(kgd, 0, 0, 0, vmid); 170 171 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 172 WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base); 173 WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit); 174 WREG32(mmSH_MEM_BASES, sh_mem_bases); 175 176 unlock_srbm(kgd); 177 } 178 179 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, 180 unsigned int vmid) 181 { 182 struct amdgpu_device *adev = get_amdgpu_device(kgd); 183 184 /* 185 * We have to assume that there is no outstanding mapping. 186 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because 187 * a mapping is in progress or because a mapping finished 188 * and the SW cleared it. 189 * So the protocol is to always wait & clear. 190 */ 191 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | 192 ATC_VMID0_PASID_MAPPING__VALID_MASK; 193 194 WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping); 195 196 while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid))) 197 cpu_relax(); 198 WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); 199 200 /* Mapping vmid to pasid also for IH block */ 201 WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping); 202 203 return 0; 204 } 205 206 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, 207 uint32_t hpd_size, uint64_t hpd_gpu_addr) 208 { 209 /* amdgpu owns the per-pipe state */ 210 return 0; 211 } 212 213 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) 214 { 215 struct amdgpu_device *adev = get_amdgpu_device(kgd); 216 uint32_t mec; 217 uint32_t pipe; 218 219 mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 220 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 221 222 lock_srbm(kgd, mec, pipe, 0, 0); 223 224 WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK); 225 226 unlock_srbm(kgd); 227 228 return 0; 229 } 230 231 static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) 232 { 233 return 0; 234 } 235 236 static inline struct vi_mqd *get_mqd(void *mqd) 237 { 238 return (struct vi_mqd *)mqd; 239 } 240 241 static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) 242 { 243 return (struct cik_sdma_rlc_registers *)mqd; 244 } 245 246 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 247 uint32_t queue_id, uint32_t __user *wptr) 248 { 249 struct vi_mqd *m; 250 uint32_t shadow_wptr, valid_wptr; 251 struct amdgpu_device *adev = get_amdgpu_device(kgd); 252 253 m = get_mqd(mqd); 254 255 valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr)); 256 if (valid_wptr == 0) 257 m->cp_hqd_pq_wptr = shadow_wptr; 258 259 acquire_queue(kgd, pipe_id, queue_id); 260 gfx_v8_0_mqd_commit(adev, mqd); 261 release_queue(kgd); 262 263 return 0; 264 } 265 266 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) 267 { 268 return 0; 269 } 270 271 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 272 uint32_t pipe_id, uint32_t queue_id) 273 { 274 struct amdgpu_device *adev = get_amdgpu_device(kgd); 275 uint32_t act; 276 bool retval = false; 277 uint32_t low, high; 278 279 acquire_queue(kgd, pipe_id, queue_id); 280 act = RREG32(mmCP_HQD_ACTIVE); 281 if (act) { 282 low = lower_32_bits(queue_address >> 8); 283 high = upper_32_bits(queue_address >> 8); 284 285 if (low == RREG32(mmCP_HQD_PQ_BASE) && 286 high == RREG32(mmCP_HQD_PQ_BASE_HI)) 287 retval = true; 288 } 289 release_queue(kgd); 290 return retval; 291 } 292 293 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) 294 { 295 struct amdgpu_device *adev = get_amdgpu_device(kgd); 296 struct cik_sdma_rlc_registers *m; 297 uint32_t sdma_base_addr; 298 uint32_t sdma_rlc_rb_cntl; 299 300 m = get_sdma_mqd(mqd); 301 sdma_base_addr = get_sdma_base_addr(m); 302 303 sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); 304 305 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) 306 return true; 307 308 return false; 309 } 310 311 static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, 312 unsigned int utimeout, uint32_t pipe_id, 313 uint32_t queue_id) 314 { 315 struct amdgpu_device *adev = get_amdgpu_device(kgd); 316 uint32_t temp; 317 int timeout = utimeout; 318 319 acquire_queue(kgd, pipe_id, queue_id); 320 321 WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); 322 323 while (true) { 324 temp = RREG32(mmCP_HQD_ACTIVE); 325 if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) 326 break; 327 if (timeout <= 0) { 328 pr_err("kfd: cp queue preemption time out.\n"); 329 release_queue(kgd); 330 return -ETIME; 331 } 332 msleep(20); 333 timeout -= 20; 334 } 335 336 release_queue(kgd); 337 return 0; 338 } 339 340 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 341 unsigned int utimeout) 342 { 343 struct amdgpu_device *adev = get_amdgpu_device(kgd); 344 struct cik_sdma_rlc_registers *m; 345 uint32_t sdma_base_addr; 346 uint32_t temp; 347 int timeout = utimeout; 348 349 m = get_sdma_mqd(mqd); 350 sdma_base_addr = get_sdma_base_addr(m); 351 352 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); 353 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; 354 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); 355 356 while (true) { 357 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); 358 if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT) 359 break; 360 if (timeout <= 0) 361 return -ETIME; 362 msleep(20); 363 timeout -= 20; 364 } 365 366 WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); 367 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); 368 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); 369 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0); 370 371 return 0; 372 } 373 374 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, 375 uint8_t vmid) 376 { 377 uint32_t reg; 378 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 379 380 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); 381 return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; 382 } 383 384 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 385 uint8_t vmid) 386 { 387 uint32_t reg; 388 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 389 390 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); 391 return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; 392 } 393 394 static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) 395 { 396 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 397 398 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); 399 } 400 401 static int kgd_address_watch_disable(struct kgd_dev *kgd) 402 { 403 return 0; 404 } 405 406 static int kgd_address_watch_execute(struct kgd_dev *kgd, 407 unsigned int watch_point_id, 408 uint32_t cntl_val, 409 uint32_t addr_hi, 410 uint32_t addr_lo) 411 { 412 return 0; 413 } 414 415 static int kgd_wave_control_execute(struct kgd_dev *kgd, 416 uint32_t gfx_index_val, 417 uint32_t sq_cmd) 418 { 419 struct amdgpu_device *adev = get_amdgpu_device(kgd); 420 uint32_t data = 0; 421 422 mutex_lock(&adev->grbm_idx_mutex); 423 424 WREG32(mmGRBM_GFX_INDEX, gfx_index_val); 425 WREG32(mmSQ_CMD, sq_cmd); 426 427 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 428 INSTANCE_BROADCAST_WRITES, 1); 429 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 430 SH_BROADCAST_WRITES, 1); 431 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 432 SE_BROADCAST_WRITES, 1); 433 434 WREG32(mmGRBM_GFX_INDEX, data); 435 mutex_unlock(&adev->grbm_idx_mutex); 436 437 return 0; 438 } 439 440 static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, 441 unsigned int watch_point_id, 442 unsigned int reg_offset) 443 { 444 return 0; 445 } 446 447 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) 448 { 449 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 450 const union amdgpu_firmware_header *hdr; 451 452 BUG_ON(kgd == NULL); 453 454 switch (type) { 455 case KGD_ENGINE_PFP: 456 hdr = (const union amdgpu_firmware_header *) 457 adev->gfx.pfp_fw->data; 458 break; 459 460 case KGD_ENGINE_ME: 461 hdr = (const union amdgpu_firmware_header *) 462 adev->gfx.me_fw->data; 463 break; 464 465 case KGD_ENGINE_CE: 466 hdr = (const union amdgpu_firmware_header *) 467 adev->gfx.ce_fw->data; 468 break; 469 470 case KGD_ENGINE_MEC1: 471 hdr = (const union amdgpu_firmware_header *) 472 adev->gfx.mec_fw->data; 473 break; 474 475 case KGD_ENGINE_MEC2: 476 hdr = (const union amdgpu_firmware_header *) 477 adev->gfx.mec2_fw->data; 478 break; 479 480 case KGD_ENGINE_RLC: 481 hdr = (const union amdgpu_firmware_header *) 482 adev->gfx.rlc_fw->data; 483 break; 484 485 case KGD_ENGINE_SDMA1: 486 hdr = (const union amdgpu_firmware_header *) 487 adev->sdma.instance[0].fw->data; 488 break; 489 490 case KGD_ENGINE_SDMA2: 491 hdr = (const union amdgpu_firmware_header *) 492 adev->sdma.instance[1].fw->data; 493 break; 494 495 default: 496 return 0; 497 } 498 499 if (hdr == NULL) 500 return 0; 501 502 /* Only 12 bit in use*/ 503 return hdr->common.ucode_version; 504 } 505