1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/mutex.h> 24 #include <linux/log2.h> 25 #include <linux/sched.h> 26 #include <linux/slab.h> 27 #include <linux/amd-iommu.h> 28 #include <linux/notifier.h> 29 struct mm_struct; 30 31 #include "kfd_priv.h" 32 33 /* 34 * Initial size for the array of queues. 35 * The allocated size is doubled each time 36 * it is exceeded up to MAX_PROCESS_QUEUES. 37 */ 38 #define INITIAL_QUEUE_ARRAY_SIZE 16 39 40 /* 41 * List of struct kfd_process (field kfd_process). 42 * Unique/indexed by mm_struct* 43 */ 44 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ 45 static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); 46 static DEFINE_MUTEX(kfd_processes_mutex); 47 48 DEFINE_STATIC_SRCU(kfd_processes_srcu); 49 50 static struct workqueue_struct *kfd_process_wq; 51 52 struct kfd_process_release_work { 53 struct work_struct kfd_work; 54 struct kfd_process *p; 55 }; 56 57 static struct kfd_process *find_process(const struct task_struct *thread); 58 static struct kfd_process *create_process(const struct task_struct *thread); 59 60 void kfd_process_create_wq(void) 61 { 62 if (!kfd_process_wq) 63 kfd_process_wq = create_workqueue("kfd_process_wq"); 64 } 65 66 void kfd_process_destroy_wq(void) 67 { 68 if (kfd_process_wq) { 69 flush_workqueue(kfd_process_wq); 70 destroy_workqueue(kfd_process_wq); 71 kfd_process_wq = NULL; 72 } 73 } 74 75 struct kfd_process *kfd_create_process(const struct task_struct *thread) 76 { 77 struct kfd_process *process; 78 79 BUG_ON(!kfd_process_wq); 80 81 if (thread->mm == NULL) 82 return ERR_PTR(-EINVAL); 83 84 /* Only the pthreads threading model is supported. */ 85 if (thread->group_leader->mm != thread->mm) 86 return ERR_PTR(-EINVAL); 87 88 /* Take mmap_sem because we call __mmu_notifier_register inside */ 89 down_write(&thread->mm->mmap_sem); 90 91 /* 92 * take kfd processes mutex before starting of process creation 93 * so there won't be a case where two threads of the same process 94 * create two kfd_process structures 95 */ 96 mutex_lock(&kfd_processes_mutex); 97 98 /* A prior open of /dev/kfd could have already created the process. */ 99 process = find_process(thread); 100 if (process) 101 pr_debug("kfd: process already found\n"); 102 103 if (!process) 104 process = create_process(thread); 105 106 mutex_unlock(&kfd_processes_mutex); 107 108 up_write(&thread->mm->mmap_sem); 109 110 return process; 111 } 112 113 struct kfd_process *kfd_get_process(const struct task_struct *thread) 114 { 115 struct kfd_process *process; 116 117 if (thread->mm == NULL) 118 return ERR_PTR(-EINVAL); 119 120 /* Only the pthreads threading model is supported. */ 121 if (thread->group_leader->mm != thread->mm) 122 return ERR_PTR(-EINVAL); 123 124 process = find_process(thread); 125 126 return process; 127 } 128 129 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm) 130 { 131 struct kfd_process *process; 132 133 hash_for_each_possible_rcu(kfd_processes_table, process, 134 kfd_processes, (uintptr_t)mm) 135 if (process->mm == mm) 136 return process; 137 138 return NULL; 139 } 140 141 static struct kfd_process *find_process(const struct task_struct *thread) 142 { 143 struct kfd_process *p; 144 int idx; 145 146 idx = srcu_read_lock(&kfd_processes_srcu); 147 p = find_process_by_mm(thread->mm); 148 srcu_read_unlock(&kfd_processes_srcu, idx); 149 150 return p; 151 } 152 153 static void kfd_process_wq_release(struct work_struct *work) 154 { 155 struct kfd_process_release_work *my_work; 156 struct kfd_process_device *pdd, *temp; 157 struct kfd_process *p; 158 159 my_work = (struct kfd_process_release_work *) work; 160 161 p = my_work->p; 162 163 mutex_lock(&p->mutex); 164 165 list_for_each_entry_safe(pdd, temp, &p->per_device_data, 166 per_device_list) { 167 amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); 168 list_del(&pdd->per_device_list); 169 170 kfree(pdd); 171 } 172 173 kfd_pasid_free(p->pasid); 174 175 mutex_unlock(&p->mutex); 176 177 mutex_destroy(&p->mutex); 178 179 kfree(p->queues); 180 181 kfree(p); 182 183 kfree((void *)work); 184 } 185 186 static void kfd_process_destroy_delayed(struct rcu_head *rcu) 187 { 188 struct kfd_process_release_work *work; 189 struct kfd_process *p; 190 191 BUG_ON(!kfd_process_wq); 192 193 p = container_of(rcu, struct kfd_process, rcu); 194 BUG_ON(atomic_read(&p->mm->mm_count) <= 0); 195 196 mmdrop(p->mm); 197 198 work = (struct kfd_process_release_work *) 199 kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC); 200 201 if (work) { 202 INIT_WORK((struct work_struct *) work, kfd_process_wq_release); 203 work->p = p; 204 queue_work(kfd_process_wq, (struct work_struct *) work); 205 } 206 } 207 208 static void kfd_process_notifier_release(struct mmu_notifier *mn, 209 struct mm_struct *mm) 210 { 211 struct kfd_process *p; 212 213 /* 214 * The kfd_process structure can not be free because the 215 * mmu_notifier srcu is read locked 216 */ 217 p = container_of(mn, struct kfd_process, mmu_notifier); 218 BUG_ON(p->mm != mm); 219 220 mutex_lock(&kfd_processes_mutex); 221 hash_del_rcu(&p->kfd_processes); 222 mutex_unlock(&kfd_processes_mutex); 223 synchronize_srcu(&kfd_processes_srcu); 224 225 mutex_lock(&p->mutex); 226 227 /* In case our notifier is called before IOMMU notifier */ 228 pqm_uninit(&p->pqm); 229 230 mutex_unlock(&p->mutex); 231 232 /* 233 * Because we drop mm_count inside kfd_process_destroy_delayed 234 * and because the mmu_notifier_unregister function also drop 235 * mm_count we need to take an extra count here. 236 */ 237 atomic_inc(&p->mm->mm_count); 238 mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm); 239 mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed); 240 } 241 242 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { 243 .release = kfd_process_notifier_release, 244 }; 245 246 static struct kfd_process *create_process(const struct task_struct *thread) 247 { 248 struct kfd_process *process; 249 int err = -ENOMEM; 250 251 process = kzalloc(sizeof(*process), GFP_KERNEL); 252 253 if (!process) 254 goto err_alloc_process; 255 256 process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE, 257 sizeof(process->queues[0]), GFP_KERNEL); 258 if (!process->queues) 259 goto err_alloc_queues; 260 261 process->pasid = kfd_pasid_alloc(); 262 if (process->pasid == 0) 263 goto err_alloc_pasid; 264 265 mutex_init(&process->mutex); 266 267 process->mm = thread->mm; 268 269 /* register notifier */ 270 process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; 271 err = __mmu_notifier_register(&process->mmu_notifier, process->mm); 272 if (err) 273 goto err_mmu_notifier; 274 275 hash_add_rcu(kfd_processes_table, &process->kfd_processes, 276 (uintptr_t)process->mm); 277 278 process->lead_thread = thread->group_leader; 279 280 process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE; 281 282 INIT_LIST_HEAD(&process->per_device_data); 283 284 err = pqm_init(&process->pqm, process); 285 if (err != 0) 286 goto err_process_pqm_init; 287 288 return process; 289 290 err_process_pqm_init: 291 hash_del_rcu(&process->kfd_processes); 292 synchronize_rcu(); 293 mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm); 294 err_mmu_notifier: 295 kfd_pasid_free(process->pasid); 296 err_alloc_pasid: 297 kfree(process->queues); 298 err_alloc_queues: 299 kfree(process); 300 err_alloc_process: 301 return ERR_PTR(err); 302 } 303 304 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, 305 struct kfd_process *p, 306 int create_pdd) 307 { 308 struct kfd_process_device *pdd = NULL; 309 310 list_for_each_entry(pdd, &p->per_device_data, per_device_list) 311 if (pdd->dev == dev) 312 return pdd; 313 314 if (create_pdd) { 315 pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); 316 if (pdd != NULL) { 317 pdd->dev = dev; 318 INIT_LIST_HEAD(&pdd->qpd.queues_list); 319 INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); 320 pdd->qpd.dqm = dev->dqm; 321 list_add(&pdd->per_device_list, &p->per_device_data); 322 } 323 } 324 325 return pdd; 326 } 327 328 /* 329 * Direct the IOMMU to bind the process (specifically the pasid->mm) 330 * to the device. 331 * Unbinding occurs when the process dies or the device is removed. 332 * 333 * Assumes that the process lock is held. 334 */ 335 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, 336 struct kfd_process *p) 337 { 338 struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p, 1); 339 int err; 340 341 if (pdd == NULL) 342 return ERR_PTR(-ENOMEM); 343 344 if (pdd->bound) 345 return pdd; 346 347 err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); 348 if (err < 0) 349 return ERR_PTR(err); 350 351 pdd->bound = true; 352 353 return pdd; 354 } 355 356 void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) 357 { 358 struct kfd_process *p; 359 struct kfd_process_device *pdd; 360 int idx, i; 361 362 BUG_ON(dev == NULL); 363 364 idx = srcu_read_lock(&kfd_processes_srcu); 365 366 hash_for_each_rcu(kfd_processes_table, i, p, kfd_processes) 367 if (p->pasid == pasid) 368 break; 369 370 srcu_read_unlock(&kfd_processes_srcu, idx); 371 372 BUG_ON(p->pasid != pasid); 373 374 mutex_lock(&p->mutex); 375 376 pqm_uninit(&p->pqm); 377 378 pdd = kfd_get_process_device_data(dev, p, 0); 379 380 /* 381 * Just mark pdd as unbound, because we still need it to call 382 * amd_iommu_unbind_pasid() in when the process exits. 383 * We don't call amd_iommu_unbind_pasid() here 384 * because the IOMMU called us. 385 */ 386 if (pdd) 387 pdd->bound = false; 388 389 mutex_unlock(&p->mutex); 390 } 391 392 struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p) 393 { 394 return list_first_entry(&p->per_device_data, 395 struct kfd_process_device, 396 per_device_list); 397 } 398 399 struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, 400 struct kfd_process_device *pdd) 401 { 402 if (list_is_last(&pdd->per_device_list, &p->per_device_data)) 403 return NULL; 404 return list_next_entry(pdd, per_device_list); 405 } 406 407 bool kfd_has_process_device_data(struct kfd_process *p) 408 { 409 return !(list_empty(&p->per_device_data)); 410 } 411