1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * VAS Fault handling. 4 * Copyright 2019, IBM Corporation 5 */ 6 7 #define pr_fmt(fmt) "vas: " fmt 8 9 #include <linux/kernel.h> 10 #include <linux/types.h> 11 #include <linux/slab.h> 12 #include <linux/uaccess.h> 13 #include <linux/kthread.h> 14 #include <linux/sched/signal.h> 15 #include <linux/mmu_context.h> 16 #include <asm/icswx.h> 17 18 #include "vas.h" 19 20 /* 21 * The maximum FIFO size for fault window can be 8MB 22 * (VAS_RX_FIFO_SIZE_MAX). Using 4MB FIFO since each VAS 23 * instance will be having fault window. 24 * 8MB FIFO can be used if expects more faults for each VAS 25 * instance. 26 */ 27 #define VAS_FAULT_WIN_FIFO_SIZE (4 << 20) 28 29 /* 30 * Update the CSB to indicate a translation error. 31 * 32 * User space will be polling on CSB after the request is issued. 33 * If NX can handle the request without any issues, it updates CSB. 34 * Whereas if NX encounters page fault, the kernel will handle the 35 * fault and update CSB with translation error. 36 * 37 * If we are unable to update the CSB means copy_to_user failed due to 38 * invalid csb_addr, send a signal to the process. 39 */ 40 static void update_csb(struct vas_window *window, 41 struct coprocessor_request_block *crb) 42 { 43 struct coprocessor_status_block csb; 44 struct kernel_siginfo info; 45 struct task_struct *tsk; 46 void __user *csb_addr; 47 struct pid *pid; 48 int rc; 49 50 /* 51 * NX user space windows can not be opened for task->mm=NULL 52 * and faults will not be generated for kernel requests. 53 */ 54 if (WARN_ON_ONCE(!window->mm || !window->user_win)) 55 return; 56 57 csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); 58 59 memset(&csb, 0, sizeof(csb)); 60 csb.cc = CSB_CC_TRANSLATION; 61 csb.ce = CSB_CE_TERMINATION; 62 csb.cs = 0; 63 csb.count = 0; 64 65 /* 66 * NX operates and returns in BE format as defined CRB struct. 67 * So saves fault_storage_addr in BE as NX pastes in FIFO and 68 * expects user space to convert to CPU format. 69 */ 70 csb.address = crb->stamp.nx.fault_storage_addr; 71 csb.flags = 0; 72 73 pid = window->pid; 74 tsk = get_pid_task(pid, PIDTYPE_PID); 75 /* 76 * Process closes send window after all pending NX requests are 77 * completed. In multi-thread applications, a child thread can 78 * open a window and can exit without closing it. May be some 79 * requests are pending or this window can be used by other 80 * threads later. We should handle faults if NX encounters 81 * pages faults on these requests. Update CSB with translation 82 * error and fault address. If csb_addr passed by user space is 83 * invalid, send SEGV signal to pid saved in window. If the 84 * child thread is not running, send the signal to tgid. 85 * Parent thread (tgid) will close this window upon its exit. 86 * 87 * pid and mm references are taken when window is opened by 88 * process (pid). So tgid is used only when child thread opens 89 * a window and exits without closing it. 90 */ 91 if (!tsk) { 92 pid = window->tgid; 93 tsk = get_pid_task(pid, PIDTYPE_PID); 94 /* 95 * Parent thread (tgid) will be closing window when it 96 * exits. So should not get here. 97 */ 98 if (WARN_ON_ONCE(!tsk)) 99 return; 100 } 101 102 /* Return if the task is exiting. */ 103 if (tsk->flags & PF_EXITING) { 104 put_task_struct(tsk); 105 return; 106 } 107 108 use_mm(window->mm); 109 rc = copy_to_user(csb_addr, &csb, sizeof(csb)); 110 /* 111 * User space polls on csb.flags (first byte). So add barrier 112 * then copy first byte with csb flags update. 113 */ 114 if (!rc) { 115 csb.flags = CSB_V; 116 /* Make sure update to csb.flags is visible now */ 117 smp_mb(); 118 rc = copy_to_user(csb_addr, &csb, sizeof(u8)); 119 } 120 unuse_mm(window->mm); 121 put_task_struct(tsk); 122 123 /* Success */ 124 if (!rc) 125 return; 126 127 pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n", 128 csb_addr, pid_vnr(pid)); 129 130 clear_siginfo(&info); 131 info.si_signo = SIGSEGV; 132 info.si_errno = EFAULT; 133 info.si_code = SEGV_MAPERR; 134 info.si_addr = csb_addr; 135 136 /* 137 * process will be polling on csb.flags after request is sent to 138 * NX. So generally CSB update should not fail except when an 139 * application passes invalid csb_addr. So an error message will 140 * be displayed and leave it to user space whether to ignore or 141 * handle this signal. 142 */ 143 rcu_read_lock(); 144 rc = kill_pid_info(SIGSEGV, &info, pid); 145 rcu_read_unlock(); 146 147 pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, 148 pid_vnr(pid), rc); 149 } 150 151 /* 152 * Process valid CRBs in fault FIFO. 153 * NX process user space requests, return credit and update the status 154 * in CRB. If it encounters transalation error when accessing CRB or 155 * request buffers, raises interrupt on the CPU to handle the fault. 156 * It takes credit on fault window, updates nx_fault_stamp in CRB with 157 * the following information and pastes CRB in fault FIFO. 158 * 159 * pswid - window ID of the window on which the request is sent. 160 * fault_storage_addr - fault address 161 * 162 * It can raise a single interrupt for multiple faults. Expects OS to 163 * process all valid faults and return credit for each fault on user 164 * space and fault windows. This fault FIFO control will be done with 165 * credit mechanism. NX can continuously paste CRBs until credits are not 166 * available on fault window. Otherwise, returns with RMA_reject. 167 * 168 * Total credits available on fault window: FIFO_SIZE(4MB)/CRBS_SIZE(128) 169 * 170 */ 171 irqreturn_t vas_fault_thread_fn(int irq, void *data) 172 { 173 struct vas_instance *vinst = data; 174 struct coprocessor_request_block *crb, *entry; 175 struct coprocessor_request_block buf; 176 struct vas_window *window; 177 unsigned long flags; 178 void *fifo; 179 180 crb = &buf; 181 182 /* 183 * VAS can interrupt with multiple page faults. So process all 184 * valid CRBs within fault FIFO until reaches invalid CRB. 185 * We use CCW[0] and pswid to validate validate CRBs: 186 * 187 * CCW[0] Reserved bit. When NX pastes CRB, CCW[0]=0 188 * OS sets this bit to 1 after reading CRB. 189 * pswid NX assigns window ID. Set pswid to -1 after 190 * reading CRB from fault FIFO. 191 * 192 * We exit this function if no valid CRBs are available to process. 193 * So acquire fault_lock and reset fifo_in_progress to 0 before 194 * exit. 195 * In case kernel receives another interrupt with different page 196 * fault, interrupt handler returns with IRQ_HANDLED if 197 * fifo_in_progress is set. Means these new faults will be 198 * handled by the current thread. Otherwise set fifo_in_progress 199 * and return IRQ_WAKE_THREAD to wake up thread. 200 */ 201 while (true) { 202 spin_lock_irqsave(&vinst->fault_lock, flags); 203 /* 204 * Advance the fault fifo pointer to next CRB. 205 * Use CRB_SIZE rather than sizeof(*crb) since the latter is 206 * aligned to CRB_ALIGN (256) but the CRB written to by VAS is 207 * only CRB_SIZE in len. 208 */ 209 fifo = vinst->fault_fifo + (vinst->fault_crbs * CRB_SIZE); 210 entry = fifo; 211 212 if ((entry->stamp.nx.pswid == cpu_to_be32(FIFO_INVALID_ENTRY)) 213 || (entry->ccw & cpu_to_be32(CCW0_INVALID))) { 214 vinst->fifo_in_progress = 0; 215 spin_unlock_irqrestore(&vinst->fault_lock, flags); 216 return IRQ_HANDLED; 217 } 218 219 spin_unlock_irqrestore(&vinst->fault_lock, flags); 220 vinst->fault_crbs++; 221 if (vinst->fault_crbs == (vinst->fault_fifo_size / CRB_SIZE)) 222 vinst->fault_crbs = 0; 223 224 memcpy(crb, fifo, CRB_SIZE); 225 entry->stamp.nx.pswid = cpu_to_be32(FIFO_INVALID_ENTRY); 226 entry->ccw |= cpu_to_be32(CCW0_INVALID); 227 228 pr_devel("VAS[%d] fault_fifo %p, fifo %p, fault_crbs %d\n", 229 vinst->vas_id, vinst->fault_fifo, fifo, 230 vinst->fault_crbs); 231 232 window = vas_pswid_to_window(vinst, 233 be32_to_cpu(crb->stamp.nx.pswid)); 234 235 if (IS_ERR(window)) { 236 /* 237 * We got an interrupt about a specific send 238 * window but we can't find that window and we can't 239 * even clean it up (return credit on user space 240 * window). 241 * But we should not get here. 242 * TODO: Disable IRQ. 243 */ 244 pr_err("VAS[%d] fault_fifo %p, fifo %p, pswid 0x%x, fault_crbs %d bad CRB?\n", 245 vinst->vas_id, vinst->fault_fifo, fifo, 246 be32_to_cpu(crb->stamp.nx.pswid), 247 vinst->fault_crbs); 248 249 WARN_ON_ONCE(1); 250 } else { 251 update_csb(window, crb); 252 } 253 } 254 } 255 256 irqreturn_t vas_fault_handler(int irq, void *dev_id) 257 { 258 struct vas_instance *vinst = dev_id; 259 irqreturn_t ret = IRQ_WAKE_THREAD; 260 unsigned long flags; 261 262 /* 263 * NX can generate an interrupt for multiple faults. So the 264 * fault handler thread process all CRBs until finds invalid 265 * entry. In case if NX sees continuous faults, it is possible 266 * that the thread function entered with the first interrupt 267 * can execute and process all valid CRBs. 268 * So wake up thread only if the fault thread is not in progress. 269 */ 270 spin_lock_irqsave(&vinst->fault_lock, flags); 271 272 if (vinst->fifo_in_progress) 273 ret = IRQ_HANDLED; 274 else 275 vinst->fifo_in_progress = 1; 276 277 spin_unlock_irqrestore(&vinst->fault_lock, flags); 278 279 return ret; 280 } 281 282 /* 283 * Fault window is opened per VAS instance. NX pastes fault CRB in fault 284 * FIFO upon page faults. 285 */ 286 int vas_setup_fault_window(struct vas_instance *vinst) 287 { 288 struct vas_rx_win_attr attr; 289 290 vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE; 291 vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL); 292 if (!vinst->fault_fifo) { 293 pr_err("Unable to alloc %d bytes for fault_fifo\n", 294 vinst->fault_fifo_size); 295 return -ENOMEM; 296 } 297 298 /* 299 * Invalidate all CRB entries. NX pastes valid entry for each fault. 300 */ 301 memset(vinst->fault_fifo, FIFO_INVALID_ENTRY, vinst->fault_fifo_size); 302 vas_init_rx_win_attr(&attr, VAS_COP_TYPE_FAULT); 303 304 attr.rx_fifo_size = vinst->fault_fifo_size; 305 attr.rx_fifo = vinst->fault_fifo; 306 307 /* 308 * Max creds is based on number of CRBs can fit in the FIFO. 309 * (fault_fifo_size/CRB_SIZE). If 8MB FIFO is used, max creds 310 * will be 0xffff since the receive creds field is 16bits wide. 311 */ 312 attr.wcreds_max = vinst->fault_fifo_size / CRB_SIZE; 313 attr.lnotify_lpid = 0; 314 attr.lnotify_pid = mfspr(SPRN_PID); 315 attr.lnotify_tid = mfspr(SPRN_PID); 316 317 vinst->fault_win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, 318 &attr); 319 320 if (IS_ERR(vinst->fault_win)) { 321 pr_err("VAS: Error %ld opening FaultWin\n", 322 PTR_ERR(vinst->fault_win)); 323 kfree(vinst->fault_fifo); 324 return PTR_ERR(vinst->fault_win); 325 } 326 327 pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n", 328 vinst->fault_win->winid, attr.lnotify_lpid, 329 attr.lnotify_pid, attr.lnotify_tid); 330 331 return 0; 332 } 333