xref: /openbmc/linux/arch/s390/kvm/pv.c (revision 82806c25)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Hosting Protected Virtual Machines
4  *
5  * Copyright IBM Corp. 2019, 2020
6  *    Author(s): Janosch Frank <frankja@linux.ibm.com>
7  */
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/minmax.h>
11 #include <linux/pagemap.h>
12 #include <linux/sched/signal.h>
13 #include <asm/gmap.h>
14 #include <asm/uv.h>
15 #include <asm/mman.h>
16 #include <linux/pagewalk.h>
17 #include <linux/sched/mm.h>
18 #include <linux/mmu_notifier.h>
19 #include "kvm-s390.h"
20 
21 static void kvm_s390_clear_pv_state(struct kvm *kvm)
22 {
23 	kvm->arch.pv.handle = 0;
24 	kvm->arch.pv.guest_len = 0;
25 	kvm->arch.pv.stor_base = 0;
26 	kvm->arch.pv.stor_var = NULL;
27 }
28 
29 int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
30 {
31 	int cc;
32 
33 	if (!kvm_s390_pv_cpu_get_handle(vcpu))
34 		return 0;
35 
36 	cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
37 
38 	KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
39 		     vcpu->vcpu_id, *rc, *rrc);
40 	WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
41 
42 	/* Intended memory leak for something that should never happen. */
43 	if (!cc)
44 		free_pages(vcpu->arch.pv.stor_base,
45 			   get_order(uv_info.guest_cpu_stor_len));
46 
47 	free_page(sida_origin(vcpu->arch.sie_block));
48 	vcpu->arch.sie_block->pv_handle_cpu = 0;
49 	vcpu->arch.sie_block->pv_handle_config = 0;
50 	memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
51 	vcpu->arch.sie_block->sdf = 0;
52 	/*
53 	 * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
54 	 * Use the reset value of gbea to avoid leaking the kernel pointer of
55 	 * the just freed sida.
56 	 */
57 	vcpu->arch.sie_block->gbea = 1;
58 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
59 
60 	return cc ? EIO : 0;
61 }
62 
63 int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
64 {
65 	struct uv_cb_csc uvcb = {
66 		.header.cmd = UVC_CMD_CREATE_SEC_CPU,
67 		.header.len = sizeof(uvcb),
68 	};
69 	int cc;
70 
71 	if (kvm_s390_pv_cpu_get_handle(vcpu))
72 		return -EINVAL;
73 
74 	vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT,
75 						   get_order(uv_info.guest_cpu_stor_len));
76 	if (!vcpu->arch.pv.stor_base)
77 		return -ENOMEM;
78 
79 	/* Input */
80 	uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
81 	uvcb.num = vcpu->arch.sie_block->icpua;
82 	uvcb.state_origin = (u64)vcpu->arch.sie_block;
83 	uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
84 
85 	/* Alloc Secure Instruction Data Area Designation */
86 	vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
87 	if (!vcpu->arch.sie_block->sidad) {
88 		free_pages(vcpu->arch.pv.stor_base,
89 			   get_order(uv_info.guest_cpu_stor_len));
90 		return -ENOMEM;
91 	}
92 
93 	cc = uv_call(0, (u64)&uvcb);
94 	*rc = uvcb.header.rc;
95 	*rrc = uvcb.header.rrc;
96 	KVM_UV_EVENT(vcpu->kvm, 3,
97 		     "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
98 		     vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
99 		     uvcb.header.rrc);
100 
101 	if (cc) {
102 		u16 dummy;
103 
104 		kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
105 		return -EIO;
106 	}
107 
108 	/* Output */
109 	vcpu->arch.pv.handle = uvcb.cpu_handle;
110 	vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
111 	vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
112 	vcpu->arch.sie_block->sdf = 2;
113 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
114 	return 0;
115 }
116 
117 /* only free resources when the destroy was successful */
118 static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
119 {
120 	vfree(kvm->arch.pv.stor_var);
121 	free_pages(kvm->arch.pv.stor_base,
122 		   get_order(uv_info.guest_base_stor_len));
123 	kvm_s390_clear_pv_state(kvm);
124 }
125 
126 static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
127 {
128 	unsigned long base = uv_info.guest_base_stor_len;
129 	unsigned long virt = uv_info.guest_virt_var_stor_len;
130 	unsigned long npages = 0, vlen = 0;
131 
132 	kvm->arch.pv.stor_var = NULL;
133 	kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
134 	if (!kvm->arch.pv.stor_base)
135 		return -ENOMEM;
136 
137 	/*
138 	 * Calculate current guest storage for allocation of the
139 	 * variable storage, which is based on the length in MB.
140 	 *
141 	 * Slots are sorted by GFN
142 	 */
143 	mutex_lock(&kvm->slots_lock);
144 	npages = kvm_s390_get_gfn_end(kvm_memslots(kvm));
145 	mutex_unlock(&kvm->slots_lock);
146 
147 	kvm->arch.pv.guest_len = npages * PAGE_SIZE;
148 
149 	/* Allocate variable storage */
150 	vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
151 	vlen += uv_info.guest_virt_base_stor_len;
152 	kvm->arch.pv.stor_var = vzalloc(vlen);
153 	if (!kvm->arch.pv.stor_var)
154 		goto out_err;
155 	return 0;
156 
157 out_err:
158 	kvm_s390_pv_dealloc_vm(kvm);
159 	return -ENOMEM;
160 }
161 
162 /* this should not fail, but if it does, we must not free the donated memory */
163 int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
164 {
165 	int cc;
166 
167 	cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
168 			   UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
169 	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
170 	/*
171 	 * if the mm still has a mapping, make all its pages accessible
172 	 * before destroying the guest
173 	 */
174 	if (mmget_not_zero(kvm->mm)) {
175 		s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
176 		mmput(kvm->mm);
177 	}
178 
179 	if (!cc) {
180 		atomic_dec(&kvm->mm->context.protected_count);
181 		kvm_s390_pv_dealloc_vm(kvm);
182 	} else {
183 		/* Intended memory leak on "impossible" error */
184 		s390_replace_asce(kvm->arch.gmap);
185 	}
186 	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
187 	WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
188 
189 	return cc ? -EIO : 0;
190 }
191 
192 static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
193 					     struct mm_struct *mm)
194 {
195 	struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
196 	u16 dummy;
197 
198 	/*
199 	 * No locking is needed since this is the last thread of the last user of this
200 	 * struct mm.
201 	 * When the struct kvm gets deinitialized, this notifier is also
202 	 * unregistered. This means that if this notifier runs, then the
203 	 * struct kvm is still valid.
204 	 */
205 	kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
206 }
207 
208 static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
209 	.release = kvm_s390_pv_mmu_notifier_release,
210 };
211 
212 int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
213 {
214 	struct uv_cb_cgc uvcb = {
215 		.header.cmd = UVC_CMD_CREATE_SEC_CONF,
216 		.header.len = sizeof(uvcb)
217 	};
218 	int cc, ret;
219 	u16 dummy;
220 
221 	ret = kvm_s390_pv_alloc_vm(kvm);
222 	if (ret)
223 		return ret;
224 
225 	/* Inputs */
226 	uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
227 	uvcb.guest_stor_len = kvm->arch.pv.guest_len;
228 	uvcb.guest_asce = kvm->arch.gmap->asce;
229 	uvcb.guest_sca = (unsigned long)kvm->arch.sca;
230 	uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base;
231 	uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
232 
233 	cc = uv_call_sched(0, (u64)&uvcb);
234 	*rc = uvcb.header.rc;
235 	*rrc = uvcb.header.rrc;
236 	KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
237 		     uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
238 
239 	/* Outputs */
240 	kvm->arch.pv.handle = uvcb.guest_handle;
241 
242 	atomic_inc(&kvm->mm->context.protected_count);
243 	if (cc) {
244 		if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
245 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
246 		} else {
247 			atomic_dec(&kvm->mm->context.protected_count);
248 			kvm_s390_pv_dealloc_vm(kvm);
249 		}
250 		return -EIO;
251 	}
252 	kvm->arch.gmap->guest_handle = uvcb.guest_handle;
253 	/* Add the notifier only once. No races because we hold kvm->lock */
254 	if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
255 		kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
256 		mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
257 	}
258 	return 0;
259 }
260 
261 int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
262 			      u16 *rrc)
263 {
264 	struct uv_cb_ssc uvcb = {
265 		.header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
266 		.header.len = sizeof(uvcb),
267 		.sec_header_origin = (u64)hdr,
268 		.sec_header_len = length,
269 		.guest_handle = kvm_s390_pv_get_handle(kvm),
270 	};
271 	int cc = uv_call(0, (u64)&uvcb);
272 
273 	*rc = uvcb.header.rc;
274 	*rrc = uvcb.header.rrc;
275 	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
276 		     *rc, *rrc);
277 	return cc ? -EINVAL : 0;
278 }
279 
280 static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
281 		      u64 offset, u16 *rc, u16 *rrc)
282 {
283 	struct uv_cb_unp uvcb = {
284 		.header.cmd = UVC_CMD_UNPACK_IMG,
285 		.header.len = sizeof(uvcb),
286 		.guest_handle = kvm_s390_pv_get_handle(kvm),
287 		.gaddr = addr,
288 		.tweak[0] = tweak,
289 		.tweak[1] = offset,
290 	};
291 	int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
292 
293 	*rc = uvcb.header.rc;
294 	*rrc = uvcb.header.rrc;
295 
296 	if (ret && ret != -EAGAIN)
297 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
298 			     uvcb.gaddr, *rc, *rrc);
299 	return ret;
300 }
301 
302 int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
303 		       unsigned long tweak, u16 *rc, u16 *rrc)
304 {
305 	u64 offset = 0;
306 	int ret = 0;
307 
308 	if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
309 		return -EINVAL;
310 
311 	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
312 		     addr, size);
313 
314 	while (offset < size) {
315 		ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
316 		if (ret == -EAGAIN) {
317 			cond_resched();
318 			if (fatal_signal_pending(current))
319 				break;
320 			continue;
321 		}
322 		if (ret)
323 			break;
324 		addr += PAGE_SIZE;
325 		offset += PAGE_SIZE;
326 	}
327 	if (!ret)
328 		KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
329 	return ret;
330 }
331 
332 int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
333 {
334 	struct uv_cb_cpu_set_state uvcb = {
335 		.header.cmd	= UVC_CMD_CPU_SET_STATE,
336 		.header.len	= sizeof(uvcb),
337 		.cpu_handle	= kvm_s390_pv_cpu_get_handle(vcpu),
338 		.state		= state,
339 	};
340 	int cc;
341 
342 	cc = uv_call(0, (u64)&uvcb);
343 	KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
344 		     vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
345 	if (cc)
346 		return -EINVAL;
347 	return 0;
348 }
349 
350 int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
351 {
352 	struct uv_cb_dump_cpu uvcb = {
353 		.header.cmd = UVC_CMD_DUMP_CPU,
354 		.header.len = sizeof(uvcb),
355 		.cpu_handle = vcpu->arch.pv.handle,
356 		.dump_area_origin = (u64)buff,
357 	};
358 	int cc;
359 
360 	cc = uv_call_sched(0, (u64)&uvcb);
361 	*rc = uvcb.header.rc;
362 	*rrc = uvcb.header.rrc;
363 	return cc;
364 }
365 
366 /* Size of the cache for the storage state dump data. 1MB for now */
367 #define DUMP_BUFF_LEN HPAGE_SIZE
368 
369 /**
370  * kvm_s390_pv_dump_stor_state
371  *
372  * @kvm: pointer to the guest's KVM struct
373  * @buff_user: Userspace pointer where we will write the results to
374  * @gaddr: Starting absolute guest address for which the storage state
375  *	   is requested.
376  * @buff_user_len: Length of the buff_user buffer
377  * @rc: Pointer to where the uvcb return code is stored
378  * @rrc: Pointer to where the uvcb return reason code is stored
379  *
380  * Stores buff_len bytes of tweak component values to buff_user
381  * starting with the 1MB block specified by the absolute guest address
382  * (gaddr). The gaddr pointer will be updated with the last address
383  * for which data was written when returning to userspace. buff_user
384  * might be written to even if an error rc is returned. For instance
385  * if we encounter a fault after writing the first page of data.
386  *
387  * Context: kvm->lock needs to be held
388  *
389  * Return:
390  *  0 on success
391  *  -ENOMEM if allocating the cache fails
392  *  -EINVAL if gaddr is not aligned to 1MB
393  *  -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
394  *  -EINVAL if the UV call fails, rc and rrc will be set in this case
395  *  -EFAULT if copying the result to buff_user failed
396  */
397 int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
398 				u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
399 {
400 	struct uv_cb_dump_stor_state uvcb = {
401 		.header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
402 		.header.len = sizeof(uvcb),
403 		.config_handle = kvm->arch.pv.handle,
404 		.gaddr = *gaddr,
405 		.dump_area_origin = 0,
406 	};
407 	const u64 increment_len = uv_info.conf_dump_storage_state_len;
408 	size_t buff_kvm_size;
409 	size_t size_done = 0;
410 	u8 *buff_kvm = NULL;
411 	int cc, ret;
412 
413 	ret = -EINVAL;
414 	/* UV call processes 1MB guest storage chunks at a time */
415 	if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
416 		goto out;
417 
418 	/*
419 	 * We provide the storage state for 1MB chunks of guest
420 	 * storage. The buffer will need to be aligned to
421 	 * conf_dump_storage_state_len so we don't end on a partial
422 	 * chunk.
423 	 */
424 	if (!buff_user_len ||
425 	    !IS_ALIGNED(buff_user_len, increment_len))
426 		goto out;
427 
428 	/*
429 	 * Allocate a buffer from which we will later copy to the user
430 	 * process. We don't want userspace to dictate our buffer size
431 	 * so we limit it to DUMP_BUFF_LEN.
432 	 */
433 	ret = -ENOMEM;
434 	buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
435 	buff_kvm = vzalloc(buff_kvm_size);
436 	if (!buff_kvm)
437 		goto out;
438 
439 	ret = 0;
440 	uvcb.dump_area_origin = (u64)buff_kvm;
441 	/* We will loop until the user buffer is filled or an error occurs */
442 	do {
443 		/* Get 1MB worth of guest storage state data */
444 		cc = uv_call_sched(0, (u64)&uvcb);
445 
446 		/* All or nothing */
447 		if (cc) {
448 			ret = -EINVAL;
449 			break;
450 		}
451 
452 		size_done += increment_len;
453 		uvcb.dump_area_origin += increment_len;
454 		buff_user_len -= increment_len;
455 		uvcb.gaddr += HPAGE_SIZE;
456 
457 		/* KVM Buffer full, time to copy to the process */
458 		if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
459 			if (copy_to_user(buff_user, buff_kvm, size_done)) {
460 				ret = -EFAULT;
461 				break;
462 			}
463 
464 			buff_user += size_done;
465 			size_done = 0;
466 			uvcb.dump_area_origin = (u64)buff_kvm;
467 		}
468 	} while (buff_user_len);
469 
470 	/* Report back where we ended dumping */
471 	*gaddr = uvcb.gaddr;
472 
473 	/* Lets only log errors, we don't want to spam */
474 out:
475 	if (ret)
476 		KVM_UV_EVENT(kvm, 3,
477 			     "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
478 			     uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
479 	*rc = uvcb.header.rc;
480 	*rrc = uvcb.header.rrc;
481 	vfree(buff_kvm);
482 
483 	return ret;
484 }
485 
486 /**
487  * kvm_s390_pv_dump_complete
488  *
489  * @kvm: pointer to the guest's KVM struct
490  * @buff_user: Userspace pointer where we will write the results to
491  * @rc: Pointer to where the uvcb return code is stored
492  * @rrc: Pointer to where the uvcb return reason code is stored
493  *
494  * Completes the dumping operation and writes the completion data to
495  * user space.
496  *
497  * Context: kvm->lock needs to be held
498  *
499  * Return:
500  *  0 on success
501  *  -ENOMEM if allocating the completion buffer fails
502  *  -EINVAL if the UV call fails, rc and rrc will be set in this case
503  *  -EFAULT if copying the result to buff_user failed
504  */
505 int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
506 			      u16 *rc, u16 *rrc)
507 {
508 	struct uv_cb_dump_complete complete = {
509 		.header.len = sizeof(complete),
510 		.header.cmd = UVC_CMD_DUMP_COMPLETE,
511 		.config_handle = kvm_s390_pv_get_handle(kvm),
512 	};
513 	u64 *compl_data;
514 	int ret;
515 
516 	/* Allocate dump area */
517 	compl_data = vzalloc(uv_info.conf_dump_finalize_len);
518 	if (!compl_data)
519 		return -ENOMEM;
520 	complete.dump_area_origin = (u64)compl_data;
521 
522 	ret = uv_call_sched(0, (u64)&complete);
523 	*rc = complete.header.rc;
524 	*rrc = complete.header.rrc;
525 	KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
526 		     complete.header.rc, complete.header.rrc);
527 
528 	if (!ret) {
529 		/*
530 		 * kvm_s390_pv_dealloc_vm() will also (mem)set
531 		 * this to false on a reboot or other destroy
532 		 * operation for this vm.
533 		 */
534 		kvm->arch.pv.dumping = false;
535 		kvm_s390_vcpu_unblock_all(kvm);
536 		ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
537 		if (ret)
538 			ret = -EFAULT;
539 	}
540 	vfree(compl_data);
541 	/* If the UVC returned an error, translate it to -EINVAL */
542 	if (ret > 0)
543 		ret = -EINVAL;
544 	return ret;
545 }
546