xref: /openbmc/linux/kernel/kexec.c (revision 474be445555ba8f2e776b4b6458c310bc215f76b)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * kexec.c - kexec_load system call
4   * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
5   */
6  
7  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8  
9  #include <linux/capability.h>
10  #include <linux/mm.h>
11  #include <linux/file.h>
12  #include <linux/security.h>
13  #include <linux/kexec.h>
14  #include <linux/mutex.h>
15  #include <linux/list.h>
16  #include <linux/syscalls.h>
17  #include <linux/vmalloc.h>
18  #include <linux/slab.h>
19  
20  #include "kexec_internal.h"
21  
22  static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
23  			     unsigned long nr_segments,
24  			     struct kexec_segment *segments,
25  			     unsigned long flags)
26  {
27  	int ret;
28  	struct kimage *image;
29  	bool kexec_on_panic = flags & KEXEC_ON_CRASH;
30  
31  	if (kexec_on_panic) {
32  		/* Verify we have a valid entry point */
33  		if ((entry < phys_to_boot_phys(crashk_res.start)) ||
34  		    (entry > phys_to_boot_phys(crashk_res.end)))
35  			return -EADDRNOTAVAIL;
36  	}
37  
38  	/* Allocate and initialize a controlling structure */
39  	image = do_kimage_alloc_init();
40  	if (!image)
41  		return -ENOMEM;
42  
43  	image->start = entry;
44  	image->nr_segments = nr_segments;
45  	memcpy(image->segment, segments, nr_segments * sizeof(*segments));
46  
47  	if (kexec_on_panic) {
48  		/* Enable special crash kernel control page alloc policy. */
49  		image->control_page = crashk_res.start;
50  		image->type = KEXEC_TYPE_CRASH;
51  	}
52  
53  	ret = sanity_check_segment_list(image);
54  	if (ret)
55  		goto out_free_image;
56  
57  	/*
58  	 * Find a location for the control code buffer, and add it
59  	 * the vector of segments so that it's pages will also be
60  	 * counted as destination pages.
61  	 */
62  	ret = -ENOMEM;
63  	image->control_code_page = kimage_alloc_control_pages(image,
64  					   get_order(KEXEC_CONTROL_PAGE_SIZE));
65  	if (!image->control_code_page) {
66  		pr_err("Could not allocate control_code_buffer\n");
67  		goto out_free_image;
68  	}
69  
70  	if (!kexec_on_panic) {
71  		image->swap_page = kimage_alloc_control_pages(image, 0);
72  		if (!image->swap_page) {
73  			pr_err("Could not allocate swap buffer\n");
74  			goto out_free_control_pages;
75  		}
76  	}
77  
78  	*rimage = image;
79  	return 0;
80  out_free_control_pages:
81  	kimage_free_page_list(&image->control_pages);
82  out_free_image:
83  	kfree(image);
84  	return ret;
85  }
86  
87  static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
88  		struct kexec_segment *segments, unsigned long flags)
89  {
90  	struct kimage **dest_image, *image;
91  	unsigned long i;
92  	int ret;
93  
94  	/*
95  	 * Because we write directly to the reserved memory region when loading
96  	 * crash kernels we need a serialization here to prevent multiple crash
97  	 * kernels from attempting to load simultaneously.
98  	 */
99  	if (!kexec_trylock())
100  		return -EBUSY;
101  
102  	if (flags & KEXEC_ON_CRASH) {
103  		dest_image = &kexec_crash_image;
104  		if (kexec_crash_image)
105  			arch_kexec_unprotect_crashkres();
106  	} else {
107  		dest_image = &kexec_image;
108  	}
109  
110  	if (nr_segments == 0) {
111  		/* Uninstall image */
112  		kimage_free(xchg(dest_image, NULL));
113  		ret = 0;
114  		goto out_unlock;
115  	}
116  	if (flags & KEXEC_ON_CRASH) {
117  		/*
118  		 * Loading another kernel to switch to if this one
119  		 * crashes.  Free any current crash dump kernel before
120  		 * we corrupt it.
121  		 */
122  		kimage_free(xchg(&kexec_crash_image, NULL));
123  	}
124  
125  	ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags);
126  	if (ret)
127  		goto out_unlock;
128  
129  	if (flags & KEXEC_PRESERVE_CONTEXT)
130  		image->preserve_context = 1;
131  
132  	ret = machine_kexec_prepare(image);
133  	if (ret)
134  		goto out;
135  
136  	/*
137  	 * Some architecture(like S390) may touch the crash memory before
138  	 * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
139  	 */
140  	ret = kimage_crash_copy_vmcoreinfo(image);
141  	if (ret)
142  		goto out;
143  
144  	for (i = 0; i < nr_segments; i++) {
145  		ret = kimage_load_segment(image, &image->segment[i]);
146  		if (ret)
147  			goto out;
148  	}
149  
150  	kimage_terminate(image);
151  
152  	ret = machine_kexec_post_load(image);
153  	if (ret)
154  		goto out;
155  
156  	/* Install the new kernel and uninstall the old */
157  	image = xchg(dest_image, image);
158  
159  out:
160  	if ((flags & KEXEC_ON_CRASH) && kexec_crash_image)
161  		arch_kexec_protect_crashkres();
162  
163  	kimage_free(image);
164  out_unlock:
165  	kexec_unlock();
166  	return ret;
167  }
168  
169  /*
170   * Exec Kernel system call: for obvious reasons only root may call it.
171   *
172   * This call breaks up into three pieces.
173   * - A generic part which loads the new kernel from the current
174   *   address space, and very carefully places the data in the
175   *   allocated pages.
176   *
177   * - A generic part that interacts with the kernel and tells all of
178   *   the devices to shut down.  Preventing on-going dmas, and placing
179   *   the devices in a consistent state so a later kernel can
180   *   reinitialize them.
181   *
182   * - A machine specific part that includes the syscall number
183   *   and then copies the image to it's final destination.  And
184   *   jumps into the image at entry.
185   *
186   * kexec does not sync, or unmount filesystems so if you need
187   * that to happen you need to do that yourself.
188   */
189  
190  static inline int kexec_load_check(unsigned long nr_segments,
191  				   unsigned long flags)
192  {
193  	int result;
194  
195  	/* We only trust the superuser with rebooting the system. */
196  	if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
197  		return -EPERM;
198  
199  	/* Permit LSMs and IMA to fail the kexec */
200  	result = security_kernel_load_data(LOADING_KEXEC_IMAGE, false);
201  	if (result < 0)
202  		return result;
203  
204  	/*
205  	 * kexec can be used to circumvent module loading restrictions, so
206  	 * prevent loading in that case
207  	 */
208  	result = security_locked_down(LOCKDOWN_KEXEC);
209  	if (result)
210  		return result;
211  
212  	/*
213  	 * Verify we have a legal set of flags
214  	 * This leaves us room for future extensions.
215  	 */
216  	if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
217  		return -EINVAL;
218  
219  	/* Put an artificial cap on the number
220  	 * of segments passed to kexec_load.
221  	 */
222  	if (nr_segments > KEXEC_SEGMENT_MAX)
223  		return -EINVAL;
224  
225  	return 0;
226  }
227  
228  SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
229  		struct kexec_segment __user *, segments, unsigned long, flags)
230  {
231  	struct kexec_segment *ksegments;
232  	unsigned long result;
233  
234  	result = kexec_load_check(nr_segments, flags);
235  	if (result)
236  		return result;
237  
238  	/* Verify we are on the appropriate architecture */
239  	if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
240  		((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
241  		return -EINVAL;
242  
243  	ksegments = memdup_user(segments, nr_segments * sizeof(ksegments[0]));
244  	if (IS_ERR(ksegments))
245  		return PTR_ERR(ksegments);
246  
247  	result = do_kexec_load(entry, nr_segments, ksegments, flags);
248  	kfree(ksegments);
249  
250  	return result;
251  }
252  
253  #ifdef CONFIG_COMPAT
254  COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
255  		       compat_ulong_t, nr_segments,
256  		       struct compat_kexec_segment __user *, segments,
257  		       compat_ulong_t, flags)
258  {
259  	struct compat_kexec_segment in;
260  	struct kexec_segment *ksegments;
261  	unsigned long i, result;
262  
263  	result = kexec_load_check(nr_segments, flags);
264  	if (result)
265  		return result;
266  
267  	/* Don't allow clients that don't understand the native
268  	 * architecture to do anything.
269  	 */
270  	if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
271  		return -EINVAL;
272  
273  	ksegments = kmalloc_array(nr_segments, sizeof(ksegments[0]),
274  			GFP_KERNEL);
275  	if (!ksegments)
276  		return -ENOMEM;
277  
278  	for (i = 0; i < nr_segments; i++) {
279  		result = copy_from_user(&in, &segments[i], sizeof(in));
280  		if (result)
281  			goto fail;
282  
283  		ksegments[i].buf   = compat_ptr(in.buf);
284  		ksegments[i].bufsz = in.bufsz;
285  		ksegments[i].mem   = in.mem;
286  		ksegments[i].memsz = in.memsz;
287  	}
288  
289  	result = do_kexec_load(entry, nr_segments, ksegments, flags);
290  
291  fail:
292  	kfree(ksegments);
293  	return result;
294  }
295  #endif
296