1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright 2020-21 IBM Corp.
4  */
5 
6 #define pr_fmt(fmt) "vas: " fmt
7 
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/export.h>
11 #include <linux/types.h>
12 #include <linux/delay.h>
13 #include <linux/slab.h>
14 #include <linux/interrupt.h>
15 #include <linux/irqdomain.h>
16 #include <asm/machdep.h>
17 #include <asm/hvcall.h>
18 #include <asm/plpar_wrappers.h>
19 #include <asm/vas.h>
20 #include "vas.h"
21 
22 #define VAS_INVALID_WIN_ADDRESS	0xFFFFFFFFFFFFFFFFul
23 #define VAS_DEFAULT_DOMAIN_ID	0xFFFFFFFFFFFFFFFFul
24 /* The hypervisor allows one credit per window right now */
25 #define DEF_WIN_CREDS		1
26 
27 static struct vas_all_caps caps_all;
28 static bool copypaste_feat;
29 
30 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
31 static DEFINE_MUTEX(vas_pseries_mutex);
32 
33 static long hcall_return_busy_check(long rc)
34 {
35 	/* Check if we are stalled for some time */
36 	if (H_IS_LONG_BUSY(rc)) {
37 		msleep(get_longbusy_msecs(rc));
38 		rc = H_BUSY;
39 	} else if (rc == H_BUSY) {
40 		cond_resched();
41 	}
42 
43 	return rc;
44 }
45 
46 /*
47  * Allocate VAS window hcall
48  */
49 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
50 				     u8 wintype, u16 credits)
51 {
52 	long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
53 	long rc;
54 
55 	do {
56 		rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
57 				  credits, domain[0], domain[1], domain[2],
58 				  domain[3], domain[4], domain[5]);
59 
60 		rc = hcall_return_busy_check(rc);
61 	} while (rc == H_BUSY);
62 
63 	if (rc == H_SUCCESS) {
64 		if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
65 			pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
66 			return -ENOTSUPP;
67 		}
68 		win->vas_win.winid = retbuf[0];
69 		win->win_addr = retbuf[1];
70 		win->complete_irq = retbuf[2];
71 		win->fault_irq = retbuf[3];
72 		return 0;
73 	}
74 
75 	pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
76 		rc, wintype, credits);
77 
78 	return -EIO;
79 }
80 
81 /*
82  * Deallocate VAS window hcall.
83  */
84 static int h_deallocate_vas_window(u64 winid)
85 {
86 	long rc;
87 
88 	do {
89 		rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
90 
91 		rc = hcall_return_busy_check(rc);
92 	} while (rc == H_BUSY);
93 
94 	if (rc == H_SUCCESS)
95 		return 0;
96 
97 	pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
98 		rc, winid);
99 	return -EIO;
100 }
101 
102 /*
103  * Modify VAS window.
104  * After the window is opened with allocate window hcall, configure it
105  * with flags and LPAR PID before using.
106  */
107 static int h_modify_vas_window(struct pseries_vas_window *win)
108 {
109 	long rc;
110 	u32 lpid = mfspr(SPRN_PID);
111 
112 	/*
113 	 * AMR value is not supported in Linux VAS implementation.
114 	 * The hypervisor ignores it if 0 is passed.
115 	 */
116 	do {
117 		rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
118 					win->vas_win.winid, lpid, 0,
119 					VAS_MOD_WIN_FLAGS, 0);
120 
121 		rc = hcall_return_busy_check(rc);
122 	} while (rc == H_BUSY);
123 
124 	if (rc == H_SUCCESS)
125 		return 0;
126 
127 	pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n",
128 			rc, win->vas_win.winid, lpid);
129 	return -EIO;
130 }
131 
132 /*
133  * This hcall is used to determine the capabilities from the hypervisor.
134  * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
135  * @query_type: If 0 is passed, the hypervisor returns the overall
136  *		capabilities which provides all feature(s) that are
137  *		available. Then query the hypervisor to get the
138  *		corresponding capabilities for the specific feature.
139  *		Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
140  *			and VAS GZIP Default capabilities.
141  *			H_QUERY_NX_CAPABILITIES provides NX GZIP
142  *			capabilities.
143  * @result: Return buffer to save capabilities.
144  */
145 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
146 {
147 	long rc;
148 
149 	rc = plpar_hcall_norets(hcall, query_type, result);
150 
151 	if (rc == H_SUCCESS)
152 		return 0;
153 
154 	/* H_FUNCTION means HV does not support VAS so don't print an error */
155 	if (rc != H_FUNCTION) {
156 		pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n",
157 			(hcall == H_QUERY_VAS_CAPABILITIES) ?
158 				"H_QUERY_VAS_CAPABILITIES" :
159 				"H_QUERY_NX_CAPABILITIES",
160 			rc, query_type, result);
161 	}
162 
163 	return -EIO;
164 }
165 EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
166 
167 /*
168  * hcall to get fault CRB from the hypervisor.
169  */
170 static int h_get_nx_fault(u32 winid, u64 buffer)
171 {
172 	long rc;
173 
174 	rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
175 
176 	if (rc == H_SUCCESS)
177 		return 0;
178 
179 	pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
180 		rc, winid, buffer);
181 	return -EIO;
182 
183 }
184 
185 /*
186  * Handle the fault interrupt.
187  * When the fault interrupt is received for each window, query the
188  * hypervisor to get the fault CRB on the specific fault. Then
189  * process the CRB by updating CSB or send signal if the user space
190  * CSB is invalid.
191  * Note: The hypervisor forwards an interrupt for each fault request.
192  *	So one fault CRB to process for each H_GET_NX_FAULT hcall.
193  */
194 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
195 {
196 	struct pseries_vas_window *txwin = data;
197 	struct coprocessor_request_block crb;
198 	struct vas_user_win_ref *tsk_ref;
199 	int rc;
200 
201 	rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
202 	if (!rc) {
203 		tsk_ref = &txwin->vas_win.task_ref;
204 		vas_dump_crb(&crb);
205 		vas_update_csb(&crb, tsk_ref);
206 	}
207 
208 	return IRQ_HANDLED;
209 }
210 
211 /*
212  * Allocate window and setup IRQ mapping.
213  */
214 static int allocate_setup_window(struct pseries_vas_window *txwin,
215 				 u64 *domain, u8 wintype)
216 {
217 	int rc;
218 
219 	rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
220 	if (rc)
221 		return rc;
222 	/*
223 	 * On PowerVM, the hypervisor setup and forwards the fault
224 	 * interrupt per window. So the IRQ setup and fault handling
225 	 * will be done for each open window separately.
226 	 */
227 	txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
228 	if (!txwin->fault_virq) {
229 		pr_err("Failed irq mapping %d\n", txwin->fault_irq);
230 		rc = -EINVAL;
231 		goto out_win;
232 	}
233 
234 	txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
235 				txwin->vas_win.winid);
236 	if (!txwin->name) {
237 		rc = -ENOMEM;
238 		goto out_irq;
239 	}
240 
241 	rc = request_threaded_irq(txwin->fault_virq, NULL,
242 				  pseries_vas_fault_thread_fn, IRQF_ONESHOT,
243 				  txwin->name, txwin);
244 	if (rc) {
245 		pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
246 		       txwin->vas_win.winid, txwin->fault_virq, rc);
247 		goto out_free;
248 	}
249 
250 	txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
251 
252 	return 0;
253 out_free:
254 	kfree(txwin->name);
255 out_irq:
256 	irq_dispose_mapping(txwin->fault_virq);
257 out_win:
258 	h_deallocate_vas_window(txwin->vas_win.winid);
259 	return rc;
260 }
261 
262 static inline void free_irq_setup(struct pseries_vas_window *txwin)
263 {
264 	free_irq(txwin->fault_virq, txwin);
265 	kfree(txwin->name);
266 	irq_dispose_mapping(txwin->fault_virq);
267 }
268 
269 static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
270 					      enum vas_cop_type cop_type)
271 {
272 	long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
273 	struct vas_cop_feat_caps *cop_feat_caps;
274 	struct vas_caps *caps;
275 	struct pseries_vas_window *txwin;
276 	int rc;
277 
278 	txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
279 	if (!txwin)
280 		return ERR_PTR(-ENOMEM);
281 
282 	/*
283 	 * A VAS window can have many credits which means that many
284 	 * requests can be issued simultaneously. But the hypervisor
285 	 * restricts one credit per window.
286 	 * The hypervisor introduces 2 different types of credits:
287 	 * Default credit type (Uses normal priority FIFO):
288 	 *	A limited number of credits are assigned to partitions
289 	 *	based on processor entitlement. But these credits may be
290 	 *	over-committed on a system depends on whether the CPUs
291 	 *	are in shared or dedicated modes - that is, more requests
292 	 *	may be issued across the system than NX can service at
293 	 *	once which can result in paste command failure (RMA_busy).
294 	 *	Then the process has to resend requests or fall-back to
295 	 *	SW compression.
296 	 * Quality of Service (QoS) credit type (Uses high priority FIFO):
297 	 *	To avoid NX HW contention, the system admins can assign
298 	 *	QoS credits for each LPAR so that this partition is
299 	 *	guaranteed access to NX resources. These credits are
300 	 *	assigned to partitions via the HMC.
301 	 *	Refer PAPR for more information.
302 	 *
303 	 * Allocate window with QoS credits if user requested. Otherwise
304 	 * default credits are used.
305 	 */
306 	if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
307 		caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
308 	else
309 		caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
310 
311 	cop_feat_caps = &caps->caps;
312 
313 	if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) >
314 			atomic_read(&cop_feat_caps->target_lpar_creds)) {
315 		pr_err("Credits are not available to allocate window\n");
316 		rc = -EINVAL;
317 		goto out;
318 	}
319 
320 	if (vas_id == -1) {
321 		/*
322 		 * The user space is requesting to allocate a window on
323 		 * a VAS instance where the process is executing.
324 		 * On PowerVM, domain values are passed to the hypervisor
325 		 * to select VAS instance. Useful if the process is
326 		 * affinity to NUMA node.
327 		 * The hypervisor selects VAS instance if
328 		 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
329 		 * The h_allocate_vas_window hcall is defined to take a
330 		 * domain values as specified by h_home_node_associativity,
331 		 * So no unpacking needs to be done.
332 		 */
333 		rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
334 				  VPHN_FLAG_VCPU, smp_processor_id());
335 		if (rc != H_SUCCESS) {
336 			pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
337 			goto out;
338 		}
339 	}
340 
341 	/*
342 	 * Allocate / Deallocate window hcalls and setup / free IRQs
343 	 * have to be protected with mutex.
344 	 * Open VAS window: Allocate window hcall and setup IRQ
345 	 * Close VAS window: Deallocate window hcall and free IRQ
346 	 *	The hypervisor waits until all NX requests are
347 	 *	completed before closing the window. So expects OS
348 	 *	to handle NX faults, means IRQ can be freed only
349 	 *	after the deallocate window hcall is returned.
350 	 * So once the window is closed with deallocate hcall before
351 	 * the IRQ is freed, it can be assigned to new allocate
352 	 * hcall with the same fault IRQ by the hypervisor. It can
353 	 * result in setup IRQ fail for the new window since the
354 	 * same fault IRQ is not freed by the OS before.
355 	 */
356 	mutex_lock(&vas_pseries_mutex);
357 	rc = allocate_setup_window(txwin, (u64 *)&domain[0],
358 				   cop_feat_caps->win_type);
359 	mutex_unlock(&vas_pseries_mutex);
360 	if (rc)
361 		goto out;
362 
363 	/*
364 	 * Modify window and it is ready to use.
365 	 */
366 	rc = h_modify_vas_window(txwin);
367 	if (!rc)
368 		rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
369 	if (rc)
370 		goto out_free;
371 
372 	vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
373 	txwin->win_type = cop_feat_caps->win_type;
374 	mutex_lock(&vas_pseries_mutex);
375 	list_add(&txwin->win_list, &caps->list);
376 	mutex_unlock(&vas_pseries_mutex);
377 
378 	return &txwin->vas_win;
379 
380 out_free:
381 	/*
382 	 * Window is not operational. Free IRQ before closing
383 	 * window so that do not have to hold mutex.
384 	 */
385 	free_irq_setup(txwin);
386 	h_deallocate_vas_window(txwin->vas_win.winid);
387 out:
388 	atomic_dec(&cop_feat_caps->used_lpar_creds);
389 	kfree(txwin);
390 	return ERR_PTR(rc);
391 }
392 
393 static u64 vas_paste_address(struct vas_window *vwin)
394 {
395 	struct pseries_vas_window *win;
396 
397 	win = container_of(vwin, struct pseries_vas_window, vas_win);
398 	return win->win_addr;
399 }
400 
401 static int deallocate_free_window(struct pseries_vas_window *win)
402 {
403 	int rc = 0;
404 
405 	/*
406 	 * The hypervisor waits for all requests including faults
407 	 * are processed before closing the window - Means all
408 	 * credits have to be returned. In the case of fault
409 	 * request, a credit is returned after OS issues
410 	 * H_GET_NX_FAULT hcall.
411 	 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
412 	 * hcall.
413 	 */
414 	rc = h_deallocate_vas_window(win->vas_win.winid);
415 	if (!rc)
416 		free_irq_setup(win);
417 
418 	return rc;
419 }
420 
421 static int vas_deallocate_window(struct vas_window *vwin)
422 {
423 	struct pseries_vas_window *win;
424 	struct vas_cop_feat_caps *caps;
425 	int rc = 0;
426 
427 	if (!vwin)
428 		return -EINVAL;
429 
430 	win = container_of(vwin, struct pseries_vas_window, vas_win);
431 
432 	/* Should not happen */
433 	if (win->win_type >= VAS_MAX_FEAT_TYPE) {
434 		pr_err("Window (%u): Invalid window type %u\n",
435 				vwin->winid, win->win_type);
436 		return -EINVAL;
437 	}
438 
439 	caps = &vascaps[win->win_type].caps;
440 	mutex_lock(&vas_pseries_mutex);
441 	rc = deallocate_free_window(win);
442 	if (rc) {
443 		mutex_unlock(&vas_pseries_mutex);
444 		return rc;
445 	}
446 
447 	list_del(&win->win_list);
448 	atomic_dec(&caps->used_lpar_creds);
449 	mutex_unlock(&vas_pseries_mutex);
450 
451 	put_vas_user_win_ref(&vwin->task_ref);
452 	mm_context_remove_vas_window(vwin->task_ref.mm);
453 
454 	kfree(win);
455 	return 0;
456 }
457 
458 static const struct vas_user_win_ops vops_pseries = {
459 	.open_win	= vas_allocate_window,	/* Open and configure window */
460 	.paste_addr	= vas_paste_address,	/* To do copy/paste */
461 	.close_win	= vas_deallocate_window, /* Close window */
462 };
463 
464 /*
465  * Supporting only nx-gzip coprocessor type now, but this API code
466  * extended to other coprocessor types later.
467  */
468 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
469 			     const char *name)
470 {
471 	int rc;
472 
473 	if (!copypaste_feat)
474 		return -ENOTSUPP;
475 
476 	rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
477 
478 	return rc;
479 }
480 EXPORT_SYMBOL_GPL(vas_register_api_pseries);
481 
482 void vas_unregister_api_pseries(void)
483 {
484 	vas_unregister_coproc_api();
485 }
486 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
487 
488 /*
489  * Get the specific capabilities based on the feature type.
490  * Right now supports GZIP default and GZIP QoS capabilities.
491  */
492 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
493 				struct hv_vas_cop_feat_caps *hv_caps)
494 {
495 	struct vas_cop_feat_caps *caps;
496 	struct vas_caps *vcaps;
497 	int rc = 0;
498 
499 	vcaps = &vascaps[type];
500 	memset(vcaps, 0, sizeof(*vcaps));
501 	INIT_LIST_HEAD(&vcaps->list);
502 
503 	caps = &vcaps->caps;
504 
505 	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
506 					  (u64)virt_to_phys(hv_caps));
507 	if (rc)
508 		return rc;
509 
510 	caps->user_mode = hv_caps->user_mode;
511 	if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
512 		pr_err("User space COPY/PASTE is not supported\n");
513 		return -ENOTSUPP;
514 	}
515 
516 	caps->descriptor = be64_to_cpu(hv_caps->descriptor);
517 	caps->win_type = hv_caps->win_type;
518 	if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
519 		pr_err("Unsupported window type %u\n", caps->win_type);
520 		return -EINVAL;
521 	}
522 	caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
523 	caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
524 	atomic_set(&caps->target_lpar_creds,
525 		   be16_to_cpu(hv_caps->target_lpar_creds));
526 	if (feat == VAS_GZIP_DEF_FEAT) {
527 		caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
528 
529 		if (caps->max_win_creds < DEF_WIN_CREDS) {
530 			pr_err("Window creds(%u) > max allowed window creds(%u)\n",
531 			       DEF_WIN_CREDS, caps->max_win_creds);
532 			return -EINVAL;
533 		}
534 	}
535 
536 	copypaste_feat = true;
537 
538 	return 0;
539 }
540 
541 static int __init pseries_vas_init(void)
542 {
543 	struct hv_vas_cop_feat_caps *hv_cop_caps;
544 	struct hv_vas_all_caps *hv_caps;
545 	int rc;
546 
547 	/*
548 	 * Linux supports user space COPY/PASTE only with Radix
549 	 */
550 	if (!radix_enabled()) {
551 		pr_err("API is supported only with radix page tables\n");
552 		return -ENOTSUPP;
553 	}
554 
555 	hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
556 	if (!hv_caps)
557 		return -ENOMEM;
558 	/*
559 	 * Get VAS overall capabilities by passing 0 to feature type.
560 	 */
561 	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
562 					  (u64)virt_to_phys(hv_caps));
563 	if (rc)
564 		goto out;
565 
566 	caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
567 	caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
568 
569 	hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL);
570 	if (!hv_cop_caps) {
571 		rc = -ENOMEM;
572 		goto out;
573 	}
574 	/*
575 	 * QOS capabilities available
576 	 */
577 	if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
578 		rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
579 					  VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps);
580 
581 		if (rc)
582 			goto out_cop;
583 	}
584 	/*
585 	 * Default capabilities available
586 	 */
587 	if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) {
588 		rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
589 					  VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps);
590 		if (rc)
591 			goto out_cop;
592 	}
593 
594 	pr_info("GZIP feature is available\n");
595 
596 out_cop:
597 	kfree(hv_cop_caps);
598 out:
599 	kfree(hv_caps);
600 	return rc;
601 }
602 machine_device_initcall(pseries, pseries_vas_init);
603