xref: /openbmc/linux/init/initramfs.c (revision fcb7aedd)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/init.h>
3 #include <linux/async.h>
4 #include <linux/fs.h>
5 #include <linux/slab.h>
6 #include <linux/types.h>
7 #include <linux/fcntl.h>
8 #include <linux/delay.h>
9 #include <linux/string.h>
10 #include <linux/dirent.h>
11 #include <linux/syscalls.h>
12 #include <linux/utime.h>
13 #include <linux/file.h>
14 #include <linux/memblock.h>
15 #include <linux/mm.h>
16 #include <linux/namei.h>
17 #include <linux/init_syscalls.h>
18 #include <linux/umh.h>
19 
20 static ssize_t __init xwrite(struct file *file, const char *p, size_t count,
21 		loff_t *pos)
22 {
23 	ssize_t out = 0;
24 
25 	/* sys_write only can write MAX_RW_COUNT aka 2G-4K bytes at most */
26 	while (count) {
27 		ssize_t rv = kernel_write(file, p, count, pos);
28 
29 		if (rv < 0) {
30 			if (rv == -EINTR || rv == -EAGAIN)
31 				continue;
32 			return out ? out : rv;
33 		} else if (rv == 0)
34 			break;
35 
36 		p += rv;
37 		out += rv;
38 		count -= rv;
39 	}
40 
41 	return out;
42 }
43 
44 static __initdata char *message;
45 static void __init error(char *x)
46 {
47 	if (!message)
48 		message = x;
49 }
50 
51 static void panic_show_mem(const char *fmt, ...)
52 {
53 	va_list args;
54 
55 	show_mem(0, NULL);
56 	va_start(args, fmt);
57 	panic(fmt, args);
58 	va_end(args);
59 }
60 
61 /* link hash */
62 
63 #define N_ALIGN(len) ((((len) + 1) & ~3) + 2)
64 
65 static __initdata struct hash {
66 	int ino, minor, major;
67 	umode_t mode;
68 	struct hash *next;
69 	char name[N_ALIGN(PATH_MAX)];
70 } *head[32];
71 
72 static inline int hash(int major, int minor, int ino)
73 {
74 	unsigned long tmp = ino + minor + (major << 3);
75 	tmp += tmp >> 5;
76 	return tmp & 31;
77 }
78 
79 static char __init *find_link(int major, int minor, int ino,
80 			      umode_t mode, char *name)
81 {
82 	struct hash **p, *q;
83 	for (p = head + hash(major, minor, ino); *p; p = &(*p)->next) {
84 		if ((*p)->ino != ino)
85 			continue;
86 		if ((*p)->minor != minor)
87 			continue;
88 		if ((*p)->major != major)
89 			continue;
90 		if (((*p)->mode ^ mode) & S_IFMT)
91 			continue;
92 		return (*p)->name;
93 	}
94 	q = kmalloc(sizeof(struct hash), GFP_KERNEL);
95 	if (!q)
96 		panic_show_mem("can't allocate link hash entry");
97 	q->major = major;
98 	q->minor = minor;
99 	q->ino = ino;
100 	q->mode = mode;
101 	strcpy(q->name, name);
102 	q->next = NULL;
103 	*p = q;
104 	return NULL;
105 }
106 
107 static void __init free_hash(void)
108 {
109 	struct hash **p, *q;
110 	for (p = head; p < head + 32; p++) {
111 		while (*p) {
112 			q = *p;
113 			*p = q->next;
114 			kfree(q);
115 		}
116 	}
117 }
118 
119 static long __init do_utime(char *filename, time64_t mtime)
120 {
121 	struct timespec64 t[2];
122 
123 	t[0].tv_sec = mtime;
124 	t[0].tv_nsec = 0;
125 	t[1].tv_sec = mtime;
126 	t[1].tv_nsec = 0;
127 	return init_utimes(filename, t);
128 }
129 
130 static __initdata LIST_HEAD(dir_list);
131 struct dir_entry {
132 	struct list_head list;
133 	time64_t mtime;
134 	char name[];
135 };
136 
137 static void __init dir_add(const char *name, time64_t mtime)
138 {
139 	size_t nlen = strlen(name) + 1;
140 	struct dir_entry *de;
141 
142 	de = kmalloc(sizeof(struct dir_entry) + nlen, GFP_KERNEL);
143 	if (!de)
144 		panic_show_mem("can't allocate dir_entry buffer");
145 	INIT_LIST_HEAD(&de->list);
146 	strscpy(de->name, name, nlen);
147 	de->mtime = mtime;
148 	list_add(&de->list, &dir_list);
149 }
150 
151 static void __init dir_utime(void)
152 {
153 	struct dir_entry *de, *tmp;
154 	list_for_each_entry_safe(de, tmp, &dir_list, list) {
155 		list_del(&de->list);
156 		do_utime(de->name, de->mtime);
157 		kfree(de);
158 	}
159 }
160 
161 static __initdata time64_t mtime;
162 
163 /* cpio header parsing */
164 
165 static __initdata unsigned long ino, major, minor, nlink;
166 static __initdata umode_t mode;
167 static __initdata unsigned long body_len, name_len;
168 static __initdata uid_t uid;
169 static __initdata gid_t gid;
170 static __initdata unsigned rdev;
171 
172 static void __init parse_header(char *s)
173 {
174 	unsigned long parsed[12];
175 	char buf[9];
176 	int i;
177 
178 	buf[8] = '\0';
179 	for (i = 0, s += 6; i < 12; i++, s += 8) {
180 		memcpy(buf, s, 8);
181 		parsed[i] = simple_strtoul(buf, NULL, 16);
182 	}
183 	ino = parsed[0];
184 	mode = parsed[1];
185 	uid = parsed[2];
186 	gid = parsed[3];
187 	nlink = parsed[4];
188 	mtime = parsed[5]; /* breaks in y2106 */
189 	body_len = parsed[6];
190 	major = parsed[7];
191 	minor = parsed[8];
192 	rdev = new_encode_dev(MKDEV(parsed[9], parsed[10]));
193 	name_len = parsed[11];
194 }
195 
196 /* FSM */
197 
198 static __initdata enum state {
199 	Start,
200 	Collect,
201 	GotHeader,
202 	SkipIt,
203 	GotName,
204 	CopyFile,
205 	GotSymlink,
206 	Reset
207 } state, next_state;
208 
209 static __initdata char *victim;
210 static unsigned long byte_count __initdata;
211 static __initdata loff_t this_header, next_header;
212 
213 static inline void __init eat(unsigned n)
214 {
215 	victim += n;
216 	this_header += n;
217 	byte_count -= n;
218 }
219 
220 static __initdata char *collected;
221 static long remains __initdata;
222 static __initdata char *collect;
223 
224 static void __init read_into(char *buf, unsigned size, enum state next)
225 {
226 	if (byte_count >= size) {
227 		collected = victim;
228 		eat(size);
229 		state = next;
230 	} else {
231 		collect = collected = buf;
232 		remains = size;
233 		next_state = next;
234 		state = Collect;
235 	}
236 }
237 
238 static __initdata char *header_buf, *symlink_buf, *name_buf;
239 
240 static int __init do_start(void)
241 {
242 	read_into(header_buf, 110, GotHeader);
243 	return 0;
244 }
245 
246 static int __init do_collect(void)
247 {
248 	unsigned long n = remains;
249 	if (byte_count < n)
250 		n = byte_count;
251 	memcpy(collect, victim, n);
252 	eat(n);
253 	collect += n;
254 	if ((remains -= n) != 0)
255 		return 1;
256 	state = next_state;
257 	return 0;
258 }
259 
260 static int __init do_header(void)
261 {
262 	if (memcmp(collected, "070701", 6)) {
263 		if (memcmp(collected, "070707", 6) == 0)
264 			error("incorrect cpio method used: use -H newc option");
265 		else
266 			error("no cpio magic");
267 		return 1;
268 	}
269 	parse_header(collected);
270 	next_header = this_header + N_ALIGN(name_len) + body_len;
271 	next_header = (next_header + 3) & ~3;
272 	state = SkipIt;
273 	if (name_len <= 0 || name_len > PATH_MAX)
274 		return 0;
275 	if (S_ISLNK(mode)) {
276 		if (body_len > PATH_MAX)
277 			return 0;
278 		collect = collected = symlink_buf;
279 		remains = N_ALIGN(name_len) + body_len;
280 		next_state = GotSymlink;
281 		state = Collect;
282 		return 0;
283 	}
284 	if (S_ISREG(mode) || !body_len)
285 		read_into(name_buf, N_ALIGN(name_len), GotName);
286 	return 0;
287 }
288 
289 static int __init do_skip(void)
290 {
291 	if (this_header + byte_count < next_header) {
292 		eat(byte_count);
293 		return 1;
294 	} else {
295 		eat(next_header - this_header);
296 		state = next_state;
297 		return 0;
298 	}
299 }
300 
301 static int __init do_reset(void)
302 {
303 	while (byte_count && *victim == '\0')
304 		eat(1);
305 	if (byte_count && (this_header & 3))
306 		error("broken padding");
307 	return 1;
308 }
309 
310 static void __init clean_path(char *path, umode_t fmode)
311 {
312 	struct kstat st;
313 
314 	if (!init_stat(path, &st, AT_SYMLINK_NOFOLLOW) &&
315 	    (st.mode ^ fmode) & S_IFMT) {
316 		if (S_ISDIR(st.mode))
317 			init_rmdir(path);
318 		else
319 			init_unlink(path);
320 	}
321 }
322 
323 static int __init maybe_link(void)
324 {
325 	if (nlink >= 2) {
326 		char *old = find_link(major, minor, ino, mode, collected);
327 		if (old) {
328 			clean_path(collected, 0);
329 			return (init_link(old, collected) < 0) ? -1 : 1;
330 		}
331 	}
332 	return 0;
333 }
334 
335 static __initdata struct file *wfile;
336 static __initdata loff_t wfile_pos;
337 
338 static int __init do_name(void)
339 {
340 	state = SkipIt;
341 	next_state = Reset;
342 	if (strcmp(collected, "TRAILER!!!") == 0) {
343 		free_hash();
344 		return 0;
345 	}
346 	clean_path(collected, mode);
347 	if (S_ISREG(mode)) {
348 		int ml = maybe_link();
349 		if (ml >= 0) {
350 			int openflags = O_WRONLY|O_CREAT;
351 			if (ml != 1)
352 				openflags |= O_TRUNC;
353 			wfile = filp_open(collected, openflags, mode);
354 			if (IS_ERR(wfile))
355 				return 0;
356 			wfile_pos = 0;
357 
358 			vfs_fchown(wfile, uid, gid);
359 			vfs_fchmod(wfile, mode);
360 			if (body_len)
361 				vfs_truncate(&wfile->f_path, body_len);
362 			state = CopyFile;
363 		}
364 	} else if (S_ISDIR(mode)) {
365 		init_mkdir(collected, mode);
366 		init_chown(collected, uid, gid, 0);
367 		init_chmod(collected, mode);
368 		dir_add(collected, mtime);
369 	} else if (S_ISBLK(mode) || S_ISCHR(mode) ||
370 		   S_ISFIFO(mode) || S_ISSOCK(mode)) {
371 		if (maybe_link() == 0) {
372 			init_mknod(collected, mode, rdev);
373 			init_chown(collected, uid, gid, 0);
374 			init_chmod(collected, mode);
375 			do_utime(collected, mtime);
376 		}
377 	}
378 	return 0;
379 }
380 
381 static int __init do_copy(void)
382 {
383 	if (byte_count >= body_len) {
384 		struct timespec64 t[2] = { };
385 		if (xwrite(wfile, victim, body_len, &wfile_pos) != body_len)
386 			error("write error");
387 
388 		t[0].tv_sec = mtime;
389 		t[1].tv_sec = mtime;
390 		vfs_utimes(&wfile->f_path, t);
391 
392 		fput(wfile);
393 		eat(body_len);
394 		state = SkipIt;
395 		return 0;
396 	} else {
397 		if (xwrite(wfile, victim, byte_count, &wfile_pos) != byte_count)
398 			error("write error");
399 		body_len -= byte_count;
400 		eat(byte_count);
401 		return 1;
402 	}
403 }
404 
405 static int __init do_symlink(void)
406 {
407 	collected[N_ALIGN(name_len) + body_len] = '\0';
408 	clean_path(collected, 0);
409 	init_symlink(collected + N_ALIGN(name_len), collected);
410 	init_chown(collected, uid, gid, AT_SYMLINK_NOFOLLOW);
411 	do_utime(collected, mtime);
412 	state = SkipIt;
413 	next_state = Reset;
414 	return 0;
415 }
416 
417 static __initdata int (*actions[])(void) = {
418 	[Start]		= do_start,
419 	[Collect]	= do_collect,
420 	[GotHeader]	= do_header,
421 	[SkipIt]	= do_skip,
422 	[GotName]	= do_name,
423 	[CopyFile]	= do_copy,
424 	[GotSymlink]	= do_symlink,
425 	[Reset]		= do_reset,
426 };
427 
428 static long __init write_buffer(char *buf, unsigned long len)
429 {
430 	byte_count = len;
431 	victim = buf;
432 
433 	while (!actions[state]())
434 		;
435 	return len - byte_count;
436 }
437 
438 static long __init flush_buffer(void *bufv, unsigned long len)
439 {
440 	char *buf = (char *) bufv;
441 	long written;
442 	long origLen = len;
443 	if (message)
444 		return -1;
445 	while ((written = write_buffer(buf, len)) < len && !message) {
446 		char c = buf[written];
447 		if (c == '0') {
448 			buf += written;
449 			len -= written;
450 			state = Start;
451 		} else if (c == 0) {
452 			buf += written;
453 			len -= written;
454 			state = Reset;
455 		} else
456 			error("junk within compressed archive");
457 	}
458 	return origLen;
459 }
460 
461 static unsigned long my_inptr; /* index of next byte to be processed in inbuf */
462 
463 #include <linux/decompress/generic.h>
464 
465 static char * __init unpack_to_rootfs(char *buf, unsigned long len)
466 {
467 	long written;
468 	decompress_fn decompress;
469 	const char *compress_name;
470 	static __initdata char msg_buf[64];
471 
472 	header_buf = kmalloc(110, GFP_KERNEL);
473 	symlink_buf = kmalloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1, GFP_KERNEL);
474 	name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL);
475 
476 	if (!header_buf || !symlink_buf || !name_buf)
477 		panic_show_mem("can't allocate buffers");
478 
479 	state = Start;
480 	this_header = 0;
481 	message = NULL;
482 	while (!message && len) {
483 		loff_t saved_offset = this_header;
484 		if (*buf == '0' && !(this_header & 3)) {
485 			state = Start;
486 			written = write_buffer(buf, len);
487 			buf += written;
488 			len -= written;
489 			continue;
490 		}
491 		if (!*buf) {
492 			buf++;
493 			len--;
494 			this_header++;
495 			continue;
496 		}
497 		this_header = 0;
498 		decompress = decompress_method(buf, len, &compress_name);
499 		pr_debug("Detected %s compressed data\n", compress_name);
500 		if (decompress) {
501 			int res = decompress(buf, len, NULL, flush_buffer, NULL,
502 				   &my_inptr, error);
503 			if (res)
504 				error("decompressor failed");
505 		} else if (compress_name) {
506 			if (!message) {
507 				snprintf(msg_buf, sizeof msg_buf,
508 					 "compression method %s not configured",
509 					 compress_name);
510 				message = msg_buf;
511 			}
512 		} else
513 			error("invalid magic at start of compressed archive");
514 		if (state != Reset)
515 			error("junk at the end of compressed archive");
516 		this_header = saved_offset + my_inptr;
517 		buf += my_inptr;
518 		len -= my_inptr;
519 	}
520 	dir_utime();
521 	kfree(name_buf);
522 	kfree(symlink_buf);
523 	kfree(header_buf);
524 	return message;
525 }
526 
527 static int __initdata do_retain_initrd;
528 
529 static int __init retain_initrd_param(char *str)
530 {
531 	if (*str)
532 		return 0;
533 	do_retain_initrd = 1;
534 	return 1;
535 }
536 __setup("retain_initrd", retain_initrd_param);
537 
538 #ifdef CONFIG_ARCH_HAS_KEEPINITRD
539 static int __init keepinitrd_setup(char *__unused)
540 {
541 	do_retain_initrd = 1;
542 	return 1;
543 }
544 __setup("keepinitrd", keepinitrd_setup);
545 #endif
546 
547 static bool __initdata initramfs_async = true;
548 static int __init initramfs_async_setup(char *str)
549 {
550 	strtobool(str, &initramfs_async);
551 	return 1;
552 }
553 __setup("initramfs_async=", initramfs_async_setup);
554 
555 extern char __initramfs_start[];
556 extern unsigned long __initramfs_size;
557 #include <linux/initrd.h>
558 #include <linux/kexec.h>
559 
560 void __init reserve_initrd_mem(void)
561 {
562 	phys_addr_t start;
563 	unsigned long size;
564 
565 	/* Ignore the virtul address computed during device tree parsing */
566 	initrd_start = initrd_end = 0;
567 
568 	if (!phys_initrd_size)
569 		return;
570 	/*
571 	 * Round the memory region to page boundaries as per free_initrd_mem()
572 	 * This allows us to detect whether the pages overlapping the initrd
573 	 * are in use, but more importantly, reserves the entire set of pages
574 	 * as we don't want these pages allocated for other purposes.
575 	 */
576 	start = round_down(phys_initrd_start, PAGE_SIZE);
577 	size = phys_initrd_size + (phys_initrd_start - start);
578 	size = round_up(size, PAGE_SIZE);
579 
580 	if (!memblock_is_region_memory(start, size)) {
581 		pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region",
582 		       (u64)start, size);
583 		goto disable;
584 	}
585 
586 	if (memblock_is_region_reserved(start, size)) {
587 		pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region\n",
588 		       (u64)start, size);
589 		goto disable;
590 	}
591 
592 	memblock_reserve(start, size);
593 	/* Now convert initrd to virtual addresses */
594 	initrd_start = (unsigned long)__va(phys_initrd_start);
595 	initrd_end = initrd_start + phys_initrd_size;
596 	initrd_below_start_ok = 1;
597 
598 	return;
599 disable:
600 	pr_cont(" - disabling initrd\n");
601 	initrd_start = 0;
602 	initrd_end = 0;
603 }
604 
605 void __weak __init free_initrd_mem(unsigned long start, unsigned long end)
606 {
607 #ifdef CONFIG_ARCH_KEEP_MEMBLOCK
608 	unsigned long aligned_start = ALIGN_DOWN(start, PAGE_SIZE);
609 	unsigned long aligned_end = ALIGN(end, PAGE_SIZE);
610 
611 	memblock_free((void *)aligned_start, aligned_end - aligned_start);
612 #endif
613 
614 	free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
615 			"initrd");
616 }
617 
618 #ifdef CONFIG_KEXEC_CORE
619 static bool __init kexec_free_initrd(void)
620 {
621 	unsigned long crashk_start = (unsigned long)__va(crashk_res.start);
622 	unsigned long crashk_end   = (unsigned long)__va(crashk_res.end);
623 
624 	/*
625 	 * If the initrd region is overlapped with crashkernel reserved region,
626 	 * free only memory that is not part of crashkernel region.
627 	 */
628 	if (initrd_start >= crashk_end || initrd_end <= crashk_start)
629 		return false;
630 
631 	/*
632 	 * Initialize initrd memory region since the kexec boot does not do.
633 	 */
634 	memset((void *)initrd_start, 0, initrd_end - initrd_start);
635 	if (initrd_start < crashk_start)
636 		free_initrd_mem(initrd_start, crashk_start);
637 	if (initrd_end > crashk_end)
638 		free_initrd_mem(crashk_end, initrd_end);
639 	return true;
640 }
641 #else
642 static inline bool kexec_free_initrd(void)
643 {
644 	return false;
645 }
646 #endif /* CONFIG_KEXEC_CORE */
647 
648 #ifdef CONFIG_BLK_DEV_RAM
649 static void __init populate_initrd_image(char *err)
650 {
651 	ssize_t written;
652 	struct file *file;
653 	loff_t pos = 0;
654 
655 	unpack_to_rootfs(__initramfs_start, __initramfs_size);
656 
657 	printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n",
658 			err);
659 	file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700);
660 	if (IS_ERR(file))
661 		return;
662 
663 	written = xwrite(file, (char *)initrd_start, initrd_end - initrd_start,
664 			&pos);
665 	if (written != initrd_end - initrd_start)
666 		pr_err("/initrd.image: incomplete write (%zd != %ld)\n",
667 		       written, initrd_end - initrd_start);
668 	fput(file);
669 }
670 #endif /* CONFIG_BLK_DEV_RAM */
671 
672 static void __init do_populate_rootfs(void *unused, async_cookie_t cookie)
673 {
674 	/* Load the built in initramfs */
675 	char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
676 	if (err)
677 		panic_show_mem("%s", err); /* Failed to decompress INTERNAL initramfs */
678 
679 	if (!initrd_start || IS_ENABLED(CONFIG_INITRAMFS_FORCE))
680 		goto done;
681 
682 	if (IS_ENABLED(CONFIG_BLK_DEV_RAM))
683 		printk(KERN_INFO "Trying to unpack rootfs image as initramfs...\n");
684 	else
685 		printk(KERN_INFO "Unpacking initramfs...\n");
686 
687 	err = unpack_to_rootfs((char *)initrd_start, initrd_end - initrd_start);
688 	if (err) {
689 #ifdef CONFIG_BLK_DEV_RAM
690 		populate_initrd_image(err);
691 #else
692 		printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
693 #endif
694 	}
695 
696 done:
697 	/*
698 	 * If the initrd region is overlapped with crashkernel reserved region,
699 	 * free only memory that is not part of crashkernel region.
700 	 */
701 	if (!do_retain_initrd && initrd_start && !kexec_free_initrd())
702 		free_initrd_mem(initrd_start, initrd_end);
703 	initrd_start = 0;
704 	initrd_end = 0;
705 
706 	flush_delayed_fput();
707 }
708 
709 static ASYNC_DOMAIN_EXCLUSIVE(initramfs_domain);
710 static async_cookie_t initramfs_cookie;
711 
712 void wait_for_initramfs(void)
713 {
714 	if (!initramfs_cookie) {
715 		/*
716 		 * Something before rootfs_initcall wants to access
717 		 * the filesystem/initramfs. Probably a bug. Make a
718 		 * note, avoid deadlocking the machine, and let the
719 		 * caller's access fail as it used to.
720 		 */
721 		pr_warn_once("wait_for_initramfs() called before rootfs_initcalls\n");
722 		return;
723 	}
724 	async_synchronize_cookie_domain(initramfs_cookie + 1, &initramfs_domain);
725 }
726 EXPORT_SYMBOL_GPL(wait_for_initramfs);
727 
728 static int __init populate_rootfs(void)
729 {
730 	initramfs_cookie = async_schedule_domain(do_populate_rootfs, NULL,
731 						 &initramfs_domain);
732 	usermodehelper_enable();
733 	if (!initramfs_async)
734 		wait_for_initramfs();
735 	return 0;
736 }
737 rootfs_initcall(populate_rootfs);
738