xref: /openbmc/linux/arch/s390/kvm/pv.c (revision 6f73517d)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Hosting Protected Virtual Machines
4  *
5  * Copyright IBM Corp. 2019, 2020
6  *    Author(s): Janosch Frank <frankja@linux.ibm.com>
7  */
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/minmax.h>
11 #include <linux/pagemap.h>
12 #include <linux/sched/signal.h>
13 #include <asm/gmap.h>
14 #include <asm/uv.h>
15 #include <asm/mman.h>
16 #include <linux/pagewalk.h>
17 #include <linux/sched/mm.h>
18 #include "kvm-s390.h"
19 
20 int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
21 {
22 	int cc;
23 
24 	if (!kvm_s390_pv_cpu_get_handle(vcpu))
25 		return 0;
26 
27 	cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
28 
29 	KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
30 		     vcpu->vcpu_id, *rc, *rrc);
31 	WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
32 
33 	/* Intended memory leak for something that should never happen. */
34 	if (!cc)
35 		free_pages(vcpu->arch.pv.stor_base,
36 			   get_order(uv_info.guest_cpu_stor_len));
37 
38 	free_page(sida_origin(vcpu->arch.sie_block));
39 	vcpu->arch.sie_block->pv_handle_cpu = 0;
40 	vcpu->arch.sie_block->pv_handle_config = 0;
41 	memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
42 	vcpu->arch.sie_block->sdf = 0;
43 	/*
44 	 * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
45 	 * Use the reset value of gbea to avoid leaking the kernel pointer of
46 	 * the just freed sida.
47 	 */
48 	vcpu->arch.sie_block->gbea = 1;
49 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
50 
51 	return cc ? EIO : 0;
52 }
53 
54 int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
55 {
56 	struct uv_cb_csc uvcb = {
57 		.header.cmd = UVC_CMD_CREATE_SEC_CPU,
58 		.header.len = sizeof(uvcb),
59 	};
60 	int cc;
61 
62 	if (kvm_s390_pv_cpu_get_handle(vcpu))
63 		return -EINVAL;
64 
65 	vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT,
66 						   get_order(uv_info.guest_cpu_stor_len));
67 	if (!vcpu->arch.pv.stor_base)
68 		return -ENOMEM;
69 
70 	/* Input */
71 	uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
72 	uvcb.num = vcpu->arch.sie_block->icpua;
73 	uvcb.state_origin = (u64)vcpu->arch.sie_block;
74 	uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
75 
76 	/* Alloc Secure Instruction Data Area Designation */
77 	vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
78 	if (!vcpu->arch.sie_block->sidad) {
79 		free_pages(vcpu->arch.pv.stor_base,
80 			   get_order(uv_info.guest_cpu_stor_len));
81 		return -ENOMEM;
82 	}
83 
84 	cc = uv_call(0, (u64)&uvcb);
85 	*rc = uvcb.header.rc;
86 	*rrc = uvcb.header.rrc;
87 	KVM_UV_EVENT(vcpu->kvm, 3,
88 		     "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
89 		     vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
90 		     uvcb.header.rrc);
91 
92 	if (cc) {
93 		u16 dummy;
94 
95 		kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
96 		return -EIO;
97 	}
98 
99 	/* Output */
100 	vcpu->arch.pv.handle = uvcb.cpu_handle;
101 	vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
102 	vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
103 	vcpu->arch.sie_block->sdf = 2;
104 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
105 	return 0;
106 }
107 
108 /* only free resources when the destroy was successful */
109 static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
110 {
111 	vfree(kvm->arch.pv.stor_var);
112 	free_pages(kvm->arch.pv.stor_base,
113 		   get_order(uv_info.guest_base_stor_len));
114 	memset(&kvm->arch.pv, 0, sizeof(kvm->arch.pv));
115 }
116 
117 static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
118 {
119 	unsigned long base = uv_info.guest_base_stor_len;
120 	unsigned long virt = uv_info.guest_virt_var_stor_len;
121 	unsigned long npages = 0, vlen = 0;
122 
123 	kvm->arch.pv.stor_var = NULL;
124 	kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
125 	if (!kvm->arch.pv.stor_base)
126 		return -ENOMEM;
127 
128 	/*
129 	 * Calculate current guest storage for allocation of the
130 	 * variable storage, which is based on the length in MB.
131 	 *
132 	 * Slots are sorted by GFN
133 	 */
134 	mutex_lock(&kvm->slots_lock);
135 	npages = kvm_s390_get_gfn_end(kvm_memslots(kvm));
136 	mutex_unlock(&kvm->slots_lock);
137 
138 	kvm->arch.pv.guest_len = npages * PAGE_SIZE;
139 
140 	/* Allocate variable storage */
141 	vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
142 	vlen += uv_info.guest_virt_base_stor_len;
143 	kvm->arch.pv.stor_var = vzalloc(vlen);
144 	if (!kvm->arch.pv.stor_var)
145 		goto out_err;
146 	return 0;
147 
148 out_err:
149 	kvm_s390_pv_dealloc_vm(kvm);
150 	return -ENOMEM;
151 }
152 
153 /* this should not fail, but if it does, we must not free the donated memory */
154 int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
155 {
156 	int cc;
157 
158 	/*
159 	 * if the mm still has a mapping, make all its pages accessible
160 	 * before destroying the guest
161 	 */
162 	if (mmget_not_zero(kvm->mm)) {
163 		s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
164 		mmput(kvm->mm);
165 	}
166 
167 	cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
168 			   UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
169 	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
170 	atomic_set(&kvm->mm->context.is_protected, 0);
171 	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
172 	WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
173 	/* Intended memory leak on "impossible" error */
174 	if (!cc) {
175 		kvm_s390_pv_dealloc_vm(kvm);
176 		return 0;
177 	}
178 	s390_replace_asce(kvm->arch.gmap);
179 	return -EIO;
180 }
181 
182 int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
183 {
184 	struct uv_cb_cgc uvcb = {
185 		.header.cmd = UVC_CMD_CREATE_SEC_CONF,
186 		.header.len = sizeof(uvcb)
187 	};
188 	int cc, ret;
189 	u16 dummy;
190 
191 	ret = kvm_s390_pv_alloc_vm(kvm);
192 	if (ret)
193 		return ret;
194 
195 	/* Inputs */
196 	uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
197 	uvcb.guest_stor_len = kvm->arch.pv.guest_len;
198 	uvcb.guest_asce = kvm->arch.gmap->asce;
199 	uvcb.guest_sca = (unsigned long)kvm->arch.sca;
200 	uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base;
201 	uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
202 
203 	cc = uv_call_sched(0, (u64)&uvcb);
204 	*rc = uvcb.header.rc;
205 	*rrc = uvcb.header.rrc;
206 	KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
207 		     uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
208 
209 	/* Outputs */
210 	kvm->arch.pv.handle = uvcb.guest_handle;
211 
212 	if (cc) {
213 		if (uvcb.header.rc & UVC_RC_NEED_DESTROY)
214 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
215 		else
216 			kvm_s390_pv_dealloc_vm(kvm);
217 		return -EIO;
218 	}
219 	kvm->arch.gmap->guest_handle = uvcb.guest_handle;
220 	return 0;
221 }
222 
223 int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
224 			      u16 *rrc)
225 {
226 	struct uv_cb_ssc uvcb = {
227 		.header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
228 		.header.len = sizeof(uvcb),
229 		.sec_header_origin = (u64)hdr,
230 		.sec_header_len = length,
231 		.guest_handle = kvm_s390_pv_get_handle(kvm),
232 	};
233 	int cc = uv_call(0, (u64)&uvcb);
234 
235 	*rc = uvcb.header.rc;
236 	*rrc = uvcb.header.rrc;
237 	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
238 		     *rc, *rrc);
239 	if (!cc)
240 		atomic_set(&kvm->mm->context.is_protected, 1);
241 	return cc ? -EINVAL : 0;
242 }
243 
244 static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
245 		      u64 offset, u16 *rc, u16 *rrc)
246 {
247 	struct uv_cb_unp uvcb = {
248 		.header.cmd = UVC_CMD_UNPACK_IMG,
249 		.header.len = sizeof(uvcb),
250 		.guest_handle = kvm_s390_pv_get_handle(kvm),
251 		.gaddr = addr,
252 		.tweak[0] = tweak,
253 		.tweak[1] = offset,
254 	};
255 	int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
256 
257 	*rc = uvcb.header.rc;
258 	*rrc = uvcb.header.rrc;
259 
260 	if (ret && ret != -EAGAIN)
261 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
262 			     uvcb.gaddr, *rc, *rrc);
263 	return ret;
264 }
265 
266 int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
267 		       unsigned long tweak, u16 *rc, u16 *rrc)
268 {
269 	u64 offset = 0;
270 	int ret = 0;
271 
272 	if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
273 		return -EINVAL;
274 
275 	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
276 		     addr, size);
277 
278 	while (offset < size) {
279 		ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
280 		if (ret == -EAGAIN) {
281 			cond_resched();
282 			if (fatal_signal_pending(current))
283 				break;
284 			continue;
285 		}
286 		if (ret)
287 			break;
288 		addr += PAGE_SIZE;
289 		offset += PAGE_SIZE;
290 	}
291 	if (!ret)
292 		KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
293 	return ret;
294 }
295 
296 int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
297 {
298 	struct uv_cb_cpu_set_state uvcb = {
299 		.header.cmd	= UVC_CMD_CPU_SET_STATE,
300 		.header.len	= sizeof(uvcb),
301 		.cpu_handle	= kvm_s390_pv_cpu_get_handle(vcpu),
302 		.state		= state,
303 	};
304 	int cc;
305 
306 	cc = uv_call(0, (u64)&uvcb);
307 	KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
308 		     vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
309 	if (cc)
310 		return -EINVAL;
311 	return 0;
312 }
313 
314 int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
315 {
316 	struct uv_cb_dump_cpu uvcb = {
317 		.header.cmd = UVC_CMD_DUMP_CPU,
318 		.header.len = sizeof(uvcb),
319 		.cpu_handle = vcpu->arch.pv.handle,
320 		.dump_area_origin = (u64)buff,
321 	};
322 	int cc;
323 
324 	cc = uv_call_sched(0, (u64)&uvcb);
325 	*rc = uvcb.header.rc;
326 	*rrc = uvcb.header.rrc;
327 	return cc;
328 }
329 
330 /* Size of the cache for the storage state dump data. 1MB for now */
331 #define DUMP_BUFF_LEN HPAGE_SIZE
332 
333 /**
334  * kvm_s390_pv_dump_stor_state
335  *
336  * @kvm: pointer to the guest's KVM struct
337  * @buff_user: Userspace pointer where we will write the results to
338  * @gaddr: Starting absolute guest address for which the storage state
339  *	   is requested.
340  * @buff_user_len: Length of the buff_user buffer
341  * @rc: Pointer to where the uvcb return code is stored
342  * @rrc: Pointer to where the uvcb return reason code is stored
343  *
344  * Stores buff_len bytes of tweak component values to buff_user
345  * starting with the 1MB block specified by the absolute guest address
346  * (gaddr). The gaddr pointer will be updated with the last address
347  * for which data was written when returning to userspace. buff_user
348  * might be written to even if an error rc is returned. For instance
349  * if we encounter a fault after writing the first page of data.
350  *
351  * Context: kvm->lock needs to be held
352  *
353  * Return:
354  *  0 on success
355  *  -ENOMEM if allocating the cache fails
356  *  -EINVAL if gaddr is not aligned to 1MB
357  *  -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
358  *  -EINVAL if the UV call fails, rc and rrc will be set in this case
359  *  -EFAULT if copying the result to buff_user failed
360  */
361 int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
362 				u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
363 {
364 	struct uv_cb_dump_stor_state uvcb = {
365 		.header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
366 		.header.len = sizeof(uvcb),
367 		.config_handle = kvm->arch.pv.handle,
368 		.gaddr = *gaddr,
369 		.dump_area_origin = 0,
370 	};
371 	const u64 increment_len = uv_info.conf_dump_storage_state_len;
372 	size_t buff_kvm_size;
373 	size_t size_done = 0;
374 	u8 *buff_kvm = NULL;
375 	int cc, ret;
376 
377 	ret = -EINVAL;
378 	/* UV call processes 1MB guest storage chunks at a time */
379 	if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
380 		goto out;
381 
382 	/*
383 	 * We provide the storage state for 1MB chunks of guest
384 	 * storage. The buffer will need to be aligned to
385 	 * conf_dump_storage_state_len so we don't end on a partial
386 	 * chunk.
387 	 */
388 	if (!buff_user_len ||
389 	    !IS_ALIGNED(buff_user_len, increment_len))
390 		goto out;
391 
392 	/*
393 	 * Allocate a buffer from which we will later copy to the user
394 	 * process. We don't want userspace to dictate our buffer size
395 	 * so we limit it to DUMP_BUFF_LEN.
396 	 */
397 	ret = -ENOMEM;
398 	buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
399 	buff_kvm = vzalloc(buff_kvm_size);
400 	if (!buff_kvm)
401 		goto out;
402 
403 	ret = 0;
404 	uvcb.dump_area_origin = (u64)buff_kvm;
405 	/* We will loop until the user buffer is filled or an error occurs */
406 	do {
407 		/* Get 1MB worth of guest storage state data */
408 		cc = uv_call_sched(0, (u64)&uvcb);
409 
410 		/* All or nothing */
411 		if (cc) {
412 			ret = -EINVAL;
413 			break;
414 		}
415 
416 		size_done += increment_len;
417 		uvcb.dump_area_origin += increment_len;
418 		buff_user_len -= increment_len;
419 		uvcb.gaddr += HPAGE_SIZE;
420 
421 		/* KVM Buffer full, time to copy to the process */
422 		if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
423 			if (copy_to_user(buff_user, buff_kvm, size_done)) {
424 				ret = -EFAULT;
425 				break;
426 			}
427 
428 			buff_user += size_done;
429 			size_done = 0;
430 			uvcb.dump_area_origin = (u64)buff_kvm;
431 		}
432 	} while (buff_user_len);
433 
434 	/* Report back where we ended dumping */
435 	*gaddr = uvcb.gaddr;
436 
437 	/* Lets only log errors, we don't want to spam */
438 out:
439 	if (ret)
440 		KVM_UV_EVENT(kvm, 3,
441 			     "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
442 			     uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
443 	*rc = uvcb.header.rc;
444 	*rrc = uvcb.header.rrc;
445 	vfree(buff_kvm);
446 
447 	return ret;
448 }
449 
450 /**
451  * kvm_s390_pv_dump_complete
452  *
453  * @kvm: pointer to the guest's KVM struct
454  * @buff_user: Userspace pointer where we will write the results to
455  * @rc: Pointer to where the uvcb return code is stored
456  * @rrc: Pointer to where the uvcb return reason code is stored
457  *
458  * Completes the dumping operation and writes the completion data to
459  * user space.
460  *
461  * Context: kvm->lock needs to be held
462  *
463  * Return:
464  *  0 on success
465  *  -ENOMEM if allocating the completion buffer fails
466  *  -EINVAL if the UV call fails, rc and rrc will be set in this case
467  *  -EFAULT if copying the result to buff_user failed
468  */
469 int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
470 			      u16 *rc, u16 *rrc)
471 {
472 	struct uv_cb_dump_complete complete = {
473 		.header.len = sizeof(complete),
474 		.header.cmd = UVC_CMD_DUMP_COMPLETE,
475 		.config_handle = kvm_s390_pv_get_handle(kvm),
476 	};
477 	u64 *compl_data;
478 	int ret;
479 
480 	/* Allocate dump area */
481 	compl_data = vzalloc(uv_info.conf_dump_finalize_len);
482 	if (!compl_data)
483 		return -ENOMEM;
484 	complete.dump_area_origin = (u64)compl_data;
485 
486 	ret = uv_call_sched(0, (u64)&complete);
487 	*rc = complete.header.rc;
488 	*rrc = complete.header.rrc;
489 	KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
490 		     complete.header.rc, complete.header.rrc);
491 
492 	if (!ret) {
493 		/*
494 		 * kvm_s390_pv_dealloc_vm() will also (mem)set
495 		 * this to false on a reboot or other destroy
496 		 * operation for this vm.
497 		 */
498 		kvm->arch.pv.dumping = false;
499 		kvm_s390_vcpu_unblock_all(kvm);
500 		ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
501 		if (ret)
502 			ret = -EFAULT;
503 	}
504 	vfree(compl_data);
505 	/* If the UVC returned an error, translate it to -EINVAL */
506 	if (ret > 0)
507 		ret = -EINVAL;
508 	return ret;
509 }
510