1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * VAS Fault handling. 4 * Copyright 2019, IBM Corporation 5 */ 6 7 #define pr_fmt(fmt) "vas: " fmt 8 9 #include <linux/kernel.h> 10 #include <linux/types.h> 11 #include <linux/slab.h> 12 #include <linux/uaccess.h> 13 #include <linux/kthread.h> 14 #include <linux/sched/signal.h> 15 #include <linux/mmu_context.h> 16 #include <asm/icswx.h> 17 18 #include "vas.h" 19 20 /* 21 * The maximum FIFO size for fault window can be 8MB 22 * (VAS_RX_FIFO_SIZE_MAX). Using 4MB FIFO since each VAS 23 * instance will be having fault window. 24 * 8MB FIFO can be used if expects more faults for each VAS 25 * instance. 26 */ 27 #define VAS_FAULT_WIN_FIFO_SIZE (4 << 20) 28 29 /* 30 * Update the CSB to indicate a translation error. 31 * 32 * User space will be polling on CSB after the request is issued. 33 * If NX can handle the request without any issues, it updates CSB. 34 * Whereas if NX encounters page fault, the kernel will handle the 35 * fault and update CSB with translation error. 36 * 37 * If we are unable to update the CSB means copy_to_user failed due to 38 * invalid csb_addr, send a signal to the process. 39 */ 40 static void update_csb(struct vas_window *window, 41 struct coprocessor_request_block *crb) 42 { 43 struct coprocessor_status_block csb; 44 struct kernel_siginfo info; 45 struct task_struct *tsk; 46 void __user *csb_addr; 47 struct pid *pid; 48 int rc; 49 50 /* 51 * NX user space windows can not be opened for task->mm=NULL 52 * and faults will not be generated for kernel requests. 53 */ 54 if (WARN_ON_ONCE(!window->mm || !window->user_win)) 55 return; 56 57 csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); 58 59 memset(&csb, 0, sizeof(csb)); 60 csb.cc = CSB_CC_TRANSLATION; 61 csb.ce = CSB_CE_TERMINATION; 62 csb.cs = 0; 63 csb.count = 0; 64 65 /* 66 * NX operates and returns in BE format as defined CRB struct. 67 * So saves fault_storage_addr in BE as NX pastes in FIFO and 68 * expects user space to convert to CPU format. 69 */ 70 csb.address = crb->stamp.nx.fault_storage_addr; 71 csb.flags = 0; 72 73 pid = window->pid; 74 tsk = get_pid_task(pid, PIDTYPE_PID); 75 /* 76 * Process closes send window after all pending NX requests are 77 * completed. In multi-thread applications, a child thread can 78 * open a window and can exit without closing it. May be some 79 * requests are pending or this window can be used by other 80 * threads later. We should handle faults if NX encounters 81 * pages faults on these requests. Update CSB with translation 82 * error and fault address. If csb_addr passed by user space is 83 * invalid, send SEGV signal to pid saved in window. If the 84 * child thread is not running, send the signal to tgid. 85 * Parent thread (tgid) will close this window upon its exit. 86 * 87 * pid and mm references are taken when window is opened by 88 * process (pid). So tgid is used only when child thread opens 89 * a window and exits without closing it. 90 */ 91 if (!tsk) { 92 pid = window->tgid; 93 tsk = get_pid_task(pid, PIDTYPE_PID); 94 /* 95 * Parent thread (tgid) will be closing window when it 96 * exits. So should not get here. 97 */ 98 if (WARN_ON_ONCE(!tsk)) 99 return; 100 } 101 102 /* Return if the task is exiting. */ 103 if (tsk->flags & PF_EXITING) { 104 put_task_struct(tsk); 105 return; 106 } 107 108 use_mm(window->mm); 109 rc = copy_to_user(csb_addr, &csb, sizeof(csb)); 110 /* 111 * User space polls on csb.flags (first byte). So add barrier 112 * then copy first byte with csb flags update. 113 */ 114 if (!rc) { 115 csb.flags = CSB_V; 116 /* Make sure update to csb.flags is visible now */ 117 smp_mb(); 118 rc = copy_to_user(csb_addr, &csb, sizeof(u8)); 119 } 120 unuse_mm(window->mm); 121 put_task_struct(tsk); 122 123 /* Success */ 124 if (!rc) 125 return; 126 127 pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n", 128 csb_addr, pid_vnr(pid)); 129 130 clear_siginfo(&info); 131 info.si_signo = SIGSEGV; 132 info.si_errno = EFAULT; 133 info.si_code = SEGV_MAPERR; 134 info.si_addr = csb_addr; 135 136 /* 137 * process will be polling on csb.flags after request is sent to 138 * NX. So generally CSB update should not fail except when an 139 * application passes invalid csb_addr. So an error message will 140 * be displayed and leave it to user space whether to ignore or 141 * handle this signal. 142 */ 143 rcu_read_lock(); 144 rc = kill_pid_info(SIGSEGV, &info, pid); 145 rcu_read_unlock(); 146 147 pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, 148 pid_vnr(pid), rc); 149 } 150 151 /* 152 * Process valid CRBs in fault FIFO. 153 * NX process user space requests, return credit and update the status 154 * in CRB. If it encounters transalation error when accessing CRB or 155 * request buffers, raises interrupt on the CPU to handle the fault. 156 * It takes credit on fault window, updates nx_fault_stamp in CRB with 157 * the following information and pastes CRB in fault FIFO. 158 * 159 * pswid - window ID of the window on which the request is sent. 160 * fault_storage_addr - fault address 161 * 162 * It can raise a single interrupt for multiple faults. Expects OS to 163 * process all valid faults and return credit for each fault on user 164 * space and fault windows. This fault FIFO control will be done with 165 * credit mechanism. NX can continuously paste CRBs until credits are not 166 * available on fault window. Otherwise, returns with RMA_reject. 167 * 168 * Total credits available on fault window: FIFO_SIZE(4MB)/CRBS_SIZE(128) 169 * 170 */ 171 irqreturn_t vas_fault_thread_fn(int irq, void *data) 172 { 173 struct vas_instance *vinst = data; 174 struct coprocessor_request_block *crb, *entry; 175 struct coprocessor_request_block buf; 176 struct vas_window *window; 177 unsigned long flags; 178 void *fifo; 179 180 crb = &buf; 181 182 /* 183 * VAS can interrupt with multiple page faults. So process all 184 * valid CRBs within fault FIFO until reaches invalid CRB. 185 * We use CCW[0] and pswid to validate validate CRBs: 186 * 187 * CCW[0] Reserved bit. When NX pastes CRB, CCW[0]=0 188 * OS sets this bit to 1 after reading CRB. 189 * pswid NX assigns window ID. Set pswid to -1 after 190 * reading CRB from fault FIFO. 191 * 192 * We exit this function if no valid CRBs are available to process. 193 * So acquire fault_lock and reset fifo_in_progress to 0 before 194 * exit. 195 * In case kernel receives another interrupt with different page 196 * fault, interrupt handler returns with IRQ_HANDLED if 197 * fifo_in_progress is set. Means these new faults will be 198 * handled by the current thread. Otherwise set fifo_in_progress 199 * and return IRQ_WAKE_THREAD to wake up thread. 200 */ 201 while (true) { 202 spin_lock_irqsave(&vinst->fault_lock, flags); 203 /* 204 * Advance the fault fifo pointer to next CRB. 205 * Use CRB_SIZE rather than sizeof(*crb) since the latter is 206 * aligned to CRB_ALIGN (256) but the CRB written to by VAS is 207 * only CRB_SIZE in len. 208 */ 209 fifo = vinst->fault_fifo + (vinst->fault_crbs * CRB_SIZE); 210 entry = fifo; 211 212 if ((entry->stamp.nx.pswid == cpu_to_be32(FIFO_INVALID_ENTRY)) 213 || (entry->ccw & cpu_to_be32(CCW0_INVALID))) { 214 vinst->fifo_in_progress = 0; 215 spin_unlock_irqrestore(&vinst->fault_lock, flags); 216 return IRQ_HANDLED; 217 } 218 219 spin_unlock_irqrestore(&vinst->fault_lock, flags); 220 vinst->fault_crbs++; 221 if (vinst->fault_crbs == (vinst->fault_fifo_size / CRB_SIZE)) 222 vinst->fault_crbs = 0; 223 224 memcpy(crb, fifo, CRB_SIZE); 225 entry->stamp.nx.pswid = cpu_to_be32(FIFO_INVALID_ENTRY); 226 entry->ccw |= cpu_to_be32(CCW0_INVALID); 227 /* 228 * Return credit for the fault window. 229 */ 230 vas_return_credit(vinst->fault_win, false); 231 232 pr_devel("VAS[%d] fault_fifo %p, fifo %p, fault_crbs %d\n", 233 vinst->vas_id, vinst->fault_fifo, fifo, 234 vinst->fault_crbs); 235 236 window = vas_pswid_to_window(vinst, 237 be32_to_cpu(crb->stamp.nx.pswid)); 238 239 if (IS_ERR(window)) { 240 /* 241 * We got an interrupt about a specific send 242 * window but we can't find that window and we can't 243 * even clean it up (return credit on user space 244 * window). 245 * But we should not get here. 246 * TODO: Disable IRQ. 247 */ 248 pr_err("VAS[%d] fault_fifo %p, fifo %p, pswid 0x%x, fault_crbs %d bad CRB?\n", 249 vinst->vas_id, vinst->fault_fifo, fifo, 250 be32_to_cpu(crb->stamp.nx.pswid), 251 vinst->fault_crbs); 252 253 WARN_ON_ONCE(1); 254 } else { 255 update_csb(window, crb); 256 /* 257 * Return credit for send window after processing 258 * fault CRB. 259 */ 260 vas_return_credit(window, true); 261 } 262 } 263 } 264 265 irqreturn_t vas_fault_handler(int irq, void *dev_id) 266 { 267 struct vas_instance *vinst = dev_id; 268 irqreturn_t ret = IRQ_WAKE_THREAD; 269 unsigned long flags; 270 271 /* 272 * NX can generate an interrupt for multiple faults. So the 273 * fault handler thread process all CRBs until finds invalid 274 * entry. In case if NX sees continuous faults, it is possible 275 * that the thread function entered with the first interrupt 276 * can execute and process all valid CRBs. 277 * So wake up thread only if the fault thread is not in progress. 278 */ 279 spin_lock_irqsave(&vinst->fault_lock, flags); 280 281 if (vinst->fifo_in_progress) 282 ret = IRQ_HANDLED; 283 else 284 vinst->fifo_in_progress = 1; 285 286 spin_unlock_irqrestore(&vinst->fault_lock, flags); 287 288 return ret; 289 } 290 291 /* 292 * Fault window is opened per VAS instance. NX pastes fault CRB in fault 293 * FIFO upon page faults. 294 */ 295 int vas_setup_fault_window(struct vas_instance *vinst) 296 { 297 struct vas_rx_win_attr attr; 298 299 vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE; 300 vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL); 301 if (!vinst->fault_fifo) { 302 pr_err("Unable to alloc %d bytes for fault_fifo\n", 303 vinst->fault_fifo_size); 304 return -ENOMEM; 305 } 306 307 /* 308 * Invalidate all CRB entries. NX pastes valid entry for each fault. 309 */ 310 memset(vinst->fault_fifo, FIFO_INVALID_ENTRY, vinst->fault_fifo_size); 311 vas_init_rx_win_attr(&attr, VAS_COP_TYPE_FAULT); 312 313 attr.rx_fifo_size = vinst->fault_fifo_size; 314 attr.rx_fifo = vinst->fault_fifo; 315 316 /* 317 * Max creds is based on number of CRBs can fit in the FIFO. 318 * (fault_fifo_size/CRB_SIZE). If 8MB FIFO is used, max creds 319 * will be 0xffff since the receive creds field is 16bits wide. 320 */ 321 attr.wcreds_max = vinst->fault_fifo_size / CRB_SIZE; 322 attr.lnotify_lpid = 0; 323 attr.lnotify_pid = mfspr(SPRN_PID); 324 attr.lnotify_tid = mfspr(SPRN_PID); 325 326 vinst->fault_win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, 327 &attr); 328 329 if (IS_ERR(vinst->fault_win)) { 330 pr_err("VAS: Error %ld opening FaultWin\n", 331 PTR_ERR(vinst->fault_win)); 332 kfree(vinst->fault_fifo); 333 return PTR_ERR(vinst->fault_win); 334 } 335 336 pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n", 337 vinst->fault_win->winid, attr.lnotify_lpid, 338 attr.lnotify_pid, attr.lnotify_tid); 339 340 return 0; 341 } 342