10d17de03SHaren Myneni // SPDX-License-Identifier: GPL-2.0+ 20d17de03SHaren Myneni /* 30d17de03SHaren Myneni * VAS Fault handling. 40d17de03SHaren Myneni * Copyright 2019, IBM Corporation 50d17de03SHaren Myneni */ 60d17de03SHaren Myneni 70d17de03SHaren Myneni #define pr_fmt(fmt) "vas: " fmt 80d17de03SHaren Myneni 90d17de03SHaren Myneni #include <linux/kernel.h> 100d17de03SHaren Myneni #include <linux/types.h> 110d17de03SHaren Myneni #include <linux/slab.h> 120d17de03SHaren Myneni #include <linux/uaccess.h> 130d17de03SHaren Myneni #include <linux/kthread.h> 14c96c4436SHaren Myneni #include <linux/sched/signal.h> 159774628aSHaren Myneni #include <linux/mmu_context.h> 160d17de03SHaren Myneni #include <asm/icswx.h> 170d17de03SHaren Myneni 180d17de03SHaren Myneni #include "vas.h" 190d17de03SHaren Myneni 200d17de03SHaren Myneni /* 210d17de03SHaren Myneni * The maximum FIFO size for fault window can be 8MB 220d17de03SHaren Myneni * (VAS_RX_FIFO_SIZE_MAX). Using 4MB FIFO since each VAS 230d17de03SHaren Myneni * instance will be having fault window. 240d17de03SHaren Myneni * 8MB FIFO can be used if expects more faults for each VAS 250d17de03SHaren Myneni * instance. 260d17de03SHaren Myneni */ 270d17de03SHaren Myneni #define VAS_FAULT_WIN_FIFO_SIZE (4 << 20) 280d17de03SHaren Myneni 290d17de03SHaren Myneni /* 30c96c4436SHaren Myneni * Update the CSB to indicate a translation error. 31c96c4436SHaren Myneni * 32c96c4436SHaren Myneni * User space will be polling on CSB after the request is issued. 33c96c4436SHaren Myneni * If NX can handle the request without any issues, it updates CSB. 34c96c4436SHaren Myneni * Whereas if NX encounters page fault, the kernel will handle the 35c96c4436SHaren Myneni * fault and update CSB with translation error. 36c96c4436SHaren Myneni * 37c96c4436SHaren Myneni * If we are unable to update the CSB means copy_to_user failed due to 38c96c4436SHaren Myneni * invalid csb_addr, send a signal to the process. 39c96c4436SHaren Myneni */ 40c96c4436SHaren Myneni static void update_csb(struct vas_window *window, 41c96c4436SHaren Myneni struct coprocessor_request_block *crb) 42c96c4436SHaren Myneni { 43c96c4436SHaren Myneni struct coprocessor_status_block csb; 44c96c4436SHaren Myneni struct kernel_siginfo info; 45c96c4436SHaren Myneni struct task_struct *tsk; 46c96c4436SHaren Myneni void __user *csb_addr; 47c96c4436SHaren Myneni struct pid *pid; 48c96c4436SHaren Myneni int rc; 49c96c4436SHaren Myneni 50c96c4436SHaren Myneni /* 51c96c4436SHaren Myneni * NX user space windows can not be opened for task->mm=NULL 52c96c4436SHaren Myneni * and faults will not be generated for kernel requests. 53c96c4436SHaren Myneni */ 54c96c4436SHaren Myneni if (WARN_ON_ONCE(!window->mm || !window->user_win)) 55c96c4436SHaren Myneni return; 56c96c4436SHaren Myneni 57c96c4436SHaren Myneni csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); 58c96c4436SHaren Myneni 59c96c4436SHaren Myneni memset(&csb, 0, sizeof(csb)); 60c96c4436SHaren Myneni csb.cc = CSB_CC_TRANSLATION; 61c96c4436SHaren Myneni csb.ce = CSB_CE_TERMINATION; 62c96c4436SHaren Myneni csb.cs = 0; 63c96c4436SHaren Myneni csb.count = 0; 64c96c4436SHaren Myneni 65c96c4436SHaren Myneni /* 66c96c4436SHaren Myneni * NX operates and returns in BE format as defined CRB struct. 67c96c4436SHaren Myneni * So saves fault_storage_addr in BE as NX pastes in FIFO and 68c96c4436SHaren Myneni * expects user space to convert to CPU format. 69c96c4436SHaren Myneni */ 70c96c4436SHaren Myneni csb.address = crb->stamp.nx.fault_storage_addr; 71c96c4436SHaren Myneni csb.flags = 0; 72c96c4436SHaren Myneni 73c96c4436SHaren Myneni pid = window->pid; 74c96c4436SHaren Myneni tsk = get_pid_task(pid, PIDTYPE_PID); 75c96c4436SHaren Myneni /* 76c96c4436SHaren Myneni * Process closes send window after all pending NX requests are 77c96c4436SHaren Myneni * completed. In multi-thread applications, a child thread can 78c96c4436SHaren Myneni * open a window and can exit without closing it. May be some 79c96c4436SHaren Myneni * requests are pending or this window can be used by other 80c96c4436SHaren Myneni * threads later. We should handle faults if NX encounters 81c96c4436SHaren Myneni * pages faults on these requests. Update CSB with translation 82c96c4436SHaren Myneni * error and fault address. If csb_addr passed by user space is 83c96c4436SHaren Myneni * invalid, send SEGV signal to pid saved in window. If the 84c96c4436SHaren Myneni * child thread is not running, send the signal to tgid. 85c96c4436SHaren Myneni * Parent thread (tgid) will close this window upon its exit. 86c96c4436SHaren Myneni * 87c96c4436SHaren Myneni * pid and mm references are taken when window is opened by 88c96c4436SHaren Myneni * process (pid). So tgid is used only when child thread opens 89c96c4436SHaren Myneni * a window and exits without closing it. 90c96c4436SHaren Myneni */ 91c96c4436SHaren Myneni if (!tsk) { 92c96c4436SHaren Myneni pid = window->tgid; 93c96c4436SHaren Myneni tsk = get_pid_task(pid, PIDTYPE_PID); 94c96c4436SHaren Myneni /* 95c96c4436SHaren Myneni * Parent thread (tgid) will be closing window when it 96c96c4436SHaren Myneni * exits. So should not get here. 97c96c4436SHaren Myneni */ 98c96c4436SHaren Myneni if (WARN_ON_ONCE(!tsk)) 99c96c4436SHaren Myneni return; 100c96c4436SHaren Myneni } 101c96c4436SHaren Myneni 102c96c4436SHaren Myneni /* Return if the task is exiting. */ 103c96c4436SHaren Myneni if (tsk->flags & PF_EXITING) { 104c96c4436SHaren Myneni put_task_struct(tsk); 105c96c4436SHaren Myneni return; 106c96c4436SHaren Myneni } 107c96c4436SHaren Myneni 108c96c4436SHaren Myneni use_mm(window->mm); 109c96c4436SHaren Myneni rc = copy_to_user(csb_addr, &csb, sizeof(csb)); 110c96c4436SHaren Myneni /* 111c96c4436SHaren Myneni * User space polls on csb.flags (first byte). So add barrier 112c96c4436SHaren Myneni * then copy first byte with csb flags update. 113c96c4436SHaren Myneni */ 114c96c4436SHaren Myneni if (!rc) { 115c96c4436SHaren Myneni csb.flags = CSB_V; 116c96c4436SHaren Myneni /* Make sure update to csb.flags is visible now */ 117c96c4436SHaren Myneni smp_mb(); 118c96c4436SHaren Myneni rc = copy_to_user(csb_addr, &csb, sizeof(u8)); 119c96c4436SHaren Myneni } 120c96c4436SHaren Myneni unuse_mm(window->mm); 121c96c4436SHaren Myneni put_task_struct(tsk); 122c96c4436SHaren Myneni 123c96c4436SHaren Myneni /* Success */ 124c96c4436SHaren Myneni if (!rc) 125c96c4436SHaren Myneni return; 126c96c4436SHaren Myneni 127c96c4436SHaren Myneni pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n", 128c96c4436SHaren Myneni csb_addr, pid_vnr(pid)); 129c96c4436SHaren Myneni 130c96c4436SHaren Myneni clear_siginfo(&info); 131c96c4436SHaren Myneni info.si_signo = SIGSEGV; 132c96c4436SHaren Myneni info.si_errno = EFAULT; 133c96c4436SHaren Myneni info.si_code = SEGV_MAPERR; 134c96c4436SHaren Myneni info.si_addr = csb_addr; 135c96c4436SHaren Myneni 136c96c4436SHaren Myneni /* 137c96c4436SHaren Myneni * process will be polling on csb.flags after request is sent to 138c96c4436SHaren Myneni * NX. So generally CSB update should not fail except when an 139c96c4436SHaren Myneni * application passes invalid csb_addr. So an error message will 140c96c4436SHaren Myneni * be displayed and leave it to user space whether to ignore or 141c96c4436SHaren Myneni * handle this signal. 142c96c4436SHaren Myneni */ 143c96c4436SHaren Myneni rcu_read_lock(); 144c96c4436SHaren Myneni rc = kill_pid_info(SIGSEGV, &info, pid); 145c96c4436SHaren Myneni rcu_read_unlock(); 146c96c4436SHaren Myneni 147c96c4436SHaren Myneni pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, 148c96c4436SHaren Myneni pid_vnr(pid), rc); 149c96c4436SHaren Myneni } 150c96c4436SHaren Myneni 151c96c4436SHaren Myneni /* 1529774628aSHaren Myneni * Process valid CRBs in fault FIFO. 1539774628aSHaren Myneni * NX process user space requests, return credit and update the status 1549774628aSHaren Myneni * in CRB. If it encounters transalation error when accessing CRB or 1559774628aSHaren Myneni * request buffers, raises interrupt on the CPU to handle the fault. 1569774628aSHaren Myneni * It takes credit on fault window, updates nx_fault_stamp in CRB with 1579774628aSHaren Myneni * the following information and pastes CRB in fault FIFO. 1589774628aSHaren Myneni * 1599774628aSHaren Myneni * pswid - window ID of the window on which the request is sent. 1609774628aSHaren Myneni * fault_storage_addr - fault address 1619774628aSHaren Myneni * 1629774628aSHaren Myneni * It can raise a single interrupt for multiple faults. Expects OS to 1639774628aSHaren Myneni * process all valid faults and return credit for each fault on user 1649774628aSHaren Myneni * space and fault windows. This fault FIFO control will be done with 1659774628aSHaren Myneni * credit mechanism. NX can continuously paste CRBs until credits are not 1669774628aSHaren Myneni * available on fault window. Otherwise, returns with RMA_reject. 1679774628aSHaren Myneni * 1689774628aSHaren Myneni * Total credits available on fault window: FIFO_SIZE(4MB)/CRBS_SIZE(128) 1699774628aSHaren Myneni * 1709774628aSHaren Myneni */ 1719774628aSHaren Myneni irqreturn_t vas_fault_thread_fn(int irq, void *data) 1729774628aSHaren Myneni { 1739774628aSHaren Myneni struct vas_instance *vinst = data; 1749774628aSHaren Myneni struct coprocessor_request_block *crb, *entry; 1759774628aSHaren Myneni struct coprocessor_request_block buf; 1769774628aSHaren Myneni struct vas_window *window; 1779774628aSHaren Myneni unsigned long flags; 1789774628aSHaren Myneni void *fifo; 1799774628aSHaren Myneni 1809774628aSHaren Myneni crb = &buf; 1819774628aSHaren Myneni 1829774628aSHaren Myneni /* 1839774628aSHaren Myneni * VAS can interrupt with multiple page faults. So process all 1849774628aSHaren Myneni * valid CRBs within fault FIFO until reaches invalid CRB. 1859774628aSHaren Myneni * We use CCW[0] and pswid to validate validate CRBs: 1869774628aSHaren Myneni * 1879774628aSHaren Myneni * CCW[0] Reserved bit. When NX pastes CRB, CCW[0]=0 1889774628aSHaren Myneni * OS sets this bit to 1 after reading CRB. 1899774628aSHaren Myneni * pswid NX assigns window ID. Set pswid to -1 after 1909774628aSHaren Myneni * reading CRB from fault FIFO. 1919774628aSHaren Myneni * 1929774628aSHaren Myneni * We exit this function if no valid CRBs are available to process. 1939774628aSHaren Myneni * So acquire fault_lock and reset fifo_in_progress to 0 before 1949774628aSHaren Myneni * exit. 1959774628aSHaren Myneni * In case kernel receives another interrupt with different page 1969774628aSHaren Myneni * fault, interrupt handler returns with IRQ_HANDLED if 1979774628aSHaren Myneni * fifo_in_progress is set. Means these new faults will be 1989774628aSHaren Myneni * handled by the current thread. Otherwise set fifo_in_progress 1999774628aSHaren Myneni * and return IRQ_WAKE_THREAD to wake up thread. 2009774628aSHaren Myneni */ 2019774628aSHaren Myneni while (true) { 2029774628aSHaren Myneni spin_lock_irqsave(&vinst->fault_lock, flags); 2039774628aSHaren Myneni /* 2049774628aSHaren Myneni * Advance the fault fifo pointer to next CRB. 2059774628aSHaren Myneni * Use CRB_SIZE rather than sizeof(*crb) since the latter is 2069774628aSHaren Myneni * aligned to CRB_ALIGN (256) but the CRB written to by VAS is 2079774628aSHaren Myneni * only CRB_SIZE in len. 2089774628aSHaren Myneni */ 2099774628aSHaren Myneni fifo = vinst->fault_fifo + (vinst->fault_crbs * CRB_SIZE); 2109774628aSHaren Myneni entry = fifo; 2119774628aSHaren Myneni 2129774628aSHaren Myneni if ((entry->stamp.nx.pswid == cpu_to_be32(FIFO_INVALID_ENTRY)) 2139774628aSHaren Myneni || (entry->ccw & cpu_to_be32(CCW0_INVALID))) { 2149774628aSHaren Myneni vinst->fifo_in_progress = 0; 2159774628aSHaren Myneni spin_unlock_irqrestore(&vinst->fault_lock, flags); 2169774628aSHaren Myneni return IRQ_HANDLED; 2179774628aSHaren Myneni } 2189774628aSHaren Myneni 2199774628aSHaren Myneni spin_unlock_irqrestore(&vinst->fault_lock, flags); 2209774628aSHaren Myneni vinst->fault_crbs++; 2219774628aSHaren Myneni if (vinst->fault_crbs == (vinst->fault_fifo_size / CRB_SIZE)) 2229774628aSHaren Myneni vinst->fault_crbs = 0; 2239774628aSHaren Myneni 2249774628aSHaren Myneni memcpy(crb, fifo, CRB_SIZE); 2259774628aSHaren Myneni entry->stamp.nx.pswid = cpu_to_be32(FIFO_INVALID_ENTRY); 2269774628aSHaren Myneni entry->ccw |= cpu_to_be32(CCW0_INVALID); 2279774628aSHaren Myneni 2289774628aSHaren Myneni pr_devel("VAS[%d] fault_fifo %p, fifo %p, fault_crbs %d\n", 2299774628aSHaren Myneni vinst->vas_id, vinst->fault_fifo, fifo, 2309774628aSHaren Myneni vinst->fault_crbs); 2319774628aSHaren Myneni 2329774628aSHaren Myneni window = vas_pswid_to_window(vinst, 2339774628aSHaren Myneni be32_to_cpu(crb->stamp.nx.pswid)); 2349774628aSHaren Myneni 2359774628aSHaren Myneni if (IS_ERR(window)) { 2369774628aSHaren Myneni /* 2379774628aSHaren Myneni * We got an interrupt about a specific send 2389774628aSHaren Myneni * window but we can't find that window and we can't 2399774628aSHaren Myneni * even clean it up (return credit on user space 2409774628aSHaren Myneni * window). 2419774628aSHaren Myneni * But we should not get here. 2429774628aSHaren Myneni * TODO: Disable IRQ. 2439774628aSHaren Myneni */ 2449774628aSHaren Myneni pr_err("VAS[%d] fault_fifo %p, fifo %p, pswid 0x%x, fault_crbs %d bad CRB?\n", 2459774628aSHaren Myneni vinst->vas_id, vinst->fault_fifo, fifo, 2469774628aSHaren Myneni be32_to_cpu(crb->stamp.nx.pswid), 2479774628aSHaren Myneni vinst->fault_crbs); 2489774628aSHaren Myneni 2499774628aSHaren Myneni WARN_ON_ONCE(1); 250c96c4436SHaren Myneni } else { 251c96c4436SHaren Myneni update_csb(window, crb); 2529774628aSHaren Myneni } 2539774628aSHaren Myneni } 2549774628aSHaren Myneni } 2559774628aSHaren Myneni 2569774628aSHaren Myneni irqreturn_t vas_fault_handler(int irq, void *dev_id) 2579774628aSHaren Myneni { 2589774628aSHaren Myneni struct vas_instance *vinst = dev_id; 2599774628aSHaren Myneni irqreturn_t ret = IRQ_WAKE_THREAD; 2609774628aSHaren Myneni unsigned long flags; 2619774628aSHaren Myneni 2629774628aSHaren Myneni /* 2639774628aSHaren Myneni * NX can generate an interrupt for multiple faults. So the 2649774628aSHaren Myneni * fault handler thread process all CRBs until finds invalid 2659774628aSHaren Myneni * entry. In case if NX sees continuous faults, it is possible 2669774628aSHaren Myneni * that the thread function entered with the first interrupt 2679774628aSHaren Myneni * can execute and process all valid CRBs. 2689774628aSHaren Myneni * So wake up thread only if the fault thread is not in progress. 2699774628aSHaren Myneni */ 2709774628aSHaren Myneni spin_lock_irqsave(&vinst->fault_lock, flags); 2719774628aSHaren Myneni 2729774628aSHaren Myneni if (vinst->fifo_in_progress) 2739774628aSHaren Myneni ret = IRQ_HANDLED; 2749774628aSHaren Myneni else 2759774628aSHaren Myneni vinst->fifo_in_progress = 1; 2769774628aSHaren Myneni 2779774628aSHaren Myneni spin_unlock_irqrestore(&vinst->fault_lock, flags); 2789774628aSHaren Myneni 2799774628aSHaren Myneni return ret; 2809774628aSHaren Myneni } 2819774628aSHaren Myneni 2829774628aSHaren Myneni /* 2830d17de03SHaren Myneni * Fault window is opened per VAS instance. NX pastes fault CRB in fault 2840d17de03SHaren Myneni * FIFO upon page faults. 2850d17de03SHaren Myneni */ 2860d17de03SHaren Myneni int vas_setup_fault_window(struct vas_instance *vinst) 2870d17de03SHaren Myneni { 2880d17de03SHaren Myneni struct vas_rx_win_attr attr; 2890d17de03SHaren Myneni 2900d17de03SHaren Myneni vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE; 2910d17de03SHaren Myneni vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL); 2920d17de03SHaren Myneni if (!vinst->fault_fifo) { 2930d17de03SHaren Myneni pr_err("Unable to alloc %d bytes for fault_fifo\n", 2940d17de03SHaren Myneni vinst->fault_fifo_size); 2950d17de03SHaren Myneni return -ENOMEM; 2960d17de03SHaren Myneni } 2970d17de03SHaren Myneni 2980d17de03SHaren Myneni /* 2990d17de03SHaren Myneni * Invalidate all CRB entries. NX pastes valid entry for each fault. 3000d17de03SHaren Myneni */ 3010d17de03SHaren Myneni memset(vinst->fault_fifo, FIFO_INVALID_ENTRY, vinst->fault_fifo_size); 3020d17de03SHaren Myneni vas_init_rx_win_attr(&attr, VAS_COP_TYPE_FAULT); 3030d17de03SHaren Myneni 3040d17de03SHaren Myneni attr.rx_fifo_size = vinst->fault_fifo_size; 3050d17de03SHaren Myneni attr.rx_fifo = vinst->fault_fifo; 3060d17de03SHaren Myneni 3070d17de03SHaren Myneni /* 3080d17de03SHaren Myneni * Max creds is based on number of CRBs can fit in the FIFO. 3090d17de03SHaren Myneni * (fault_fifo_size/CRB_SIZE). If 8MB FIFO is used, max creds 3100d17de03SHaren Myneni * will be 0xffff since the receive creds field is 16bits wide. 3110d17de03SHaren Myneni */ 3120d17de03SHaren Myneni attr.wcreds_max = vinst->fault_fifo_size / CRB_SIZE; 3130d17de03SHaren Myneni attr.lnotify_lpid = 0; 3140d17de03SHaren Myneni attr.lnotify_pid = mfspr(SPRN_PID); 3150d17de03SHaren Myneni attr.lnotify_tid = mfspr(SPRN_PID); 3160d17de03SHaren Myneni 3170d17de03SHaren Myneni vinst->fault_win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, 3180d17de03SHaren Myneni &attr); 3190d17de03SHaren Myneni 3200d17de03SHaren Myneni if (IS_ERR(vinst->fault_win)) { 3210d17de03SHaren Myneni pr_err("VAS: Error %ld opening FaultWin\n", 3220d17de03SHaren Myneni PTR_ERR(vinst->fault_win)); 3230d17de03SHaren Myneni kfree(vinst->fault_fifo); 3240d17de03SHaren Myneni return PTR_ERR(vinst->fault_win); 3250d17de03SHaren Myneni } 3260d17de03SHaren Myneni 3270d17de03SHaren Myneni pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n", 3280d17de03SHaren Myneni vinst->fault_win->winid, attr.lnotify_lpid, 3290d17de03SHaren Myneni attr.lnotify_pid, attr.lnotify_tid); 3300d17de03SHaren Myneni 3310d17de03SHaren Myneni return 0; 3320d17de03SHaren Myneni } 333