xref: /openbmc/linux/kernel/printk/printk.c (revision 48ca54e3)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  linux/kernel/printk.c
4  *
5  *  Copyright (C) 1991, 1992  Linus Torvalds
6  *
7  * Modified to make sys_syslog() more flexible: added commands to
8  * return the last 4k of kernel messages, regardless of whether
9  * they've been read or not.  Added option to suppress kernel printk's
10  * to the console.  Added hook for sending the console messages
11  * elsewhere, in preparation for a serial line console (someday).
12  * Ted Ts'o, 2/11/93.
13  * Modified for sysctl support, 1/8/97, Chris Horn.
14  * Fixed SMP synchronization, 08/08/99, Manfred Spraul
15  *     manfred@colorfullife.com
16  * Rewrote bits to get rid of console_lock
17  *	01Mar01 Andrew Morton
18  */
19 
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 
22 #include <linux/kernel.h>
23 #include <linux/mm.h>
24 #include <linux/tty.h>
25 #include <linux/tty_driver.h>
26 #include <linux/console.h>
27 #include <linux/init.h>
28 #include <linux/jiffies.h>
29 #include <linux/nmi.h>
30 #include <linux/module.h>
31 #include <linux/moduleparam.h>
32 #include <linux/delay.h>
33 #include <linux/smp.h>
34 #include <linux/security.h>
35 #include <linux/memblock.h>
36 #include <linux/syscalls.h>
37 #include <linux/crash_core.h>
38 #include <linux/ratelimit.h>
39 #include <linux/kmsg_dump.h>
40 #include <linux/syslog.h>
41 #include <linux/cpu.h>
42 #include <linux/rculist.h>
43 #include <linux/poll.h>
44 #include <linux/irq_work.h>
45 #include <linux/ctype.h>
46 #include <linux/uio.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/debug.h>
49 #include <linux/sched/task_stack.h>
50 
51 #include <linux/uaccess.h>
52 #include <asm/sections.h>
53 
54 #include <trace/events/initcall.h>
55 #define CREATE_TRACE_POINTS
56 #include <trace/events/printk.h>
57 
58 #include "printk_ringbuffer.h"
59 #include "console_cmdline.h"
60 #include "braille.h"
61 #include "internal.h"
62 
63 int console_printk[4] = {
64 	CONSOLE_LOGLEVEL_DEFAULT,	/* console_loglevel */
65 	MESSAGE_LOGLEVEL_DEFAULT,	/* default_message_loglevel */
66 	CONSOLE_LOGLEVEL_MIN,		/* minimum_console_loglevel */
67 	CONSOLE_LOGLEVEL_DEFAULT,	/* default_console_loglevel */
68 };
69 EXPORT_SYMBOL_GPL(console_printk);
70 
71 atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0);
72 EXPORT_SYMBOL(ignore_console_lock_warning);
73 
74 /*
75  * Low level drivers may need that to know if they can schedule in
76  * their unblank() callback or not. So let's export it.
77  */
78 int oops_in_progress;
79 EXPORT_SYMBOL(oops_in_progress);
80 
81 /*
82  * console_sem protects the console_drivers list, and also
83  * provides serialisation for access to the entire console
84  * driver system.
85  */
86 static DEFINE_SEMAPHORE(console_sem);
87 struct console *console_drivers;
88 EXPORT_SYMBOL_GPL(console_drivers);
89 
90 /*
91  * System may need to suppress printk message under certain
92  * circumstances, like after kernel panic happens.
93  */
94 int __read_mostly suppress_printk;
95 
96 /*
97  * During panic, heavy printk by other CPUs can delay the
98  * panic and risk deadlock on console resources.
99  */
100 static int __read_mostly suppress_panic_printk;
101 
102 #ifdef CONFIG_LOCKDEP
103 static struct lockdep_map console_lock_dep_map = {
104 	.name = "console_lock"
105 };
106 #endif
107 
108 enum devkmsg_log_bits {
109 	__DEVKMSG_LOG_BIT_ON = 0,
110 	__DEVKMSG_LOG_BIT_OFF,
111 	__DEVKMSG_LOG_BIT_LOCK,
112 };
113 
114 enum devkmsg_log_masks {
115 	DEVKMSG_LOG_MASK_ON             = BIT(__DEVKMSG_LOG_BIT_ON),
116 	DEVKMSG_LOG_MASK_OFF            = BIT(__DEVKMSG_LOG_BIT_OFF),
117 	DEVKMSG_LOG_MASK_LOCK           = BIT(__DEVKMSG_LOG_BIT_LOCK),
118 };
119 
120 /* Keep both the 'on' and 'off' bits clear, i.e. ratelimit by default: */
121 #define DEVKMSG_LOG_MASK_DEFAULT	0
122 
123 static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT;
124 
125 static int __control_devkmsg(char *str)
126 {
127 	size_t len;
128 
129 	if (!str)
130 		return -EINVAL;
131 
132 	len = str_has_prefix(str, "on");
133 	if (len) {
134 		devkmsg_log = DEVKMSG_LOG_MASK_ON;
135 		return len;
136 	}
137 
138 	len = str_has_prefix(str, "off");
139 	if (len) {
140 		devkmsg_log = DEVKMSG_LOG_MASK_OFF;
141 		return len;
142 	}
143 
144 	len = str_has_prefix(str, "ratelimit");
145 	if (len) {
146 		devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT;
147 		return len;
148 	}
149 
150 	return -EINVAL;
151 }
152 
153 static int __init control_devkmsg(char *str)
154 {
155 	if (__control_devkmsg(str) < 0) {
156 		pr_warn("printk.devkmsg: bad option string '%s'\n", str);
157 		return 1;
158 	}
159 
160 	/*
161 	 * Set sysctl string accordingly:
162 	 */
163 	if (devkmsg_log == DEVKMSG_LOG_MASK_ON)
164 		strcpy(devkmsg_log_str, "on");
165 	else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF)
166 		strcpy(devkmsg_log_str, "off");
167 	/* else "ratelimit" which is set by default. */
168 
169 	/*
170 	 * Sysctl cannot change it anymore. The kernel command line setting of
171 	 * this parameter is to force the setting to be permanent throughout the
172 	 * runtime of the system. This is a precation measure against userspace
173 	 * trying to be a smarta** and attempting to change it up on us.
174 	 */
175 	devkmsg_log |= DEVKMSG_LOG_MASK_LOCK;
176 
177 	return 1;
178 }
179 __setup("printk.devkmsg=", control_devkmsg);
180 
181 char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit";
182 #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
183 int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
184 			      void *buffer, size_t *lenp, loff_t *ppos)
185 {
186 	char old_str[DEVKMSG_STR_MAX_SIZE];
187 	unsigned int old;
188 	int err;
189 
190 	if (write) {
191 		if (devkmsg_log & DEVKMSG_LOG_MASK_LOCK)
192 			return -EINVAL;
193 
194 		old = devkmsg_log;
195 		strncpy(old_str, devkmsg_log_str, DEVKMSG_STR_MAX_SIZE);
196 	}
197 
198 	err = proc_dostring(table, write, buffer, lenp, ppos);
199 	if (err)
200 		return err;
201 
202 	if (write) {
203 		err = __control_devkmsg(devkmsg_log_str);
204 
205 		/*
206 		 * Do not accept an unknown string OR a known string with
207 		 * trailing crap...
208 		 */
209 		if (err < 0 || (err + 1 != *lenp)) {
210 
211 			/* ... and restore old setting. */
212 			devkmsg_log = old;
213 			strncpy(devkmsg_log_str, old_str, DEVKMSG_STR_MAX_SIZE);
214 
215 			return -EINVAL;
216 		}
217 	}
218 
219 	return 0;
220 }
221 #endif /* CONFIG_PRINTK && CONFIG_SYSCTL */
222 
223 /* Number of registered extended console drivers. */
224 static int nr_ext_console_drivers;
225 
226 /*
227  * Used to synchronize printing kthreads against direct printing via
228  * console_trylock/console_unlock.
229  *
230  * Values:
231  * -1 = console kthreads atomically blocked (via global trylock)
232  *  0 = no kthread printing, console not locked (via trylock)
233  * >0 = kthread(s) actively printing
234  *
235  * Note: For synchronizing against direct printing via
236  *       console_lock/console_unlock, see the @lock variable in
237  *       struct console.
238  */
239 static atomic_t console_kthreads_active = ATOMIC_INIT(0);
240 
241 #define console_kthreads_atomic_tryblock() \
242 	(atomic_cmpxchg(&console_kthreads_active, 0, -1) == 0)
243 #define console_kthreads_atomic_unblock() \
244 	atomic_cmpxchg(&console_kthreads_active, -1, 0)
245 #define console_kthreads_atomically_blocked() \
246 	(atomic_read(&console_kthreads_active) == -1)
247 
248 #define console_kthread_printing_tryenter() \
249 	atomic_inc_unless_negative(&console_kthreads_active)
250 #define console_kthread_printing_exit() \
251 	atomic_dec(&console_kthreads_active)
252 
253 /* Block console kthreads to avoid processing new messages. */
254 bool block_console_kthreads;
255 
256 /*
257  * Helper macros to handle lockdep when locking/unlocking console_sem. We use
258  * macros instead of functions so that _RET_IP_ contains useful information.
259  */
260 #define down_console_sem() do { \
261 	down(&console_sem);\
262 	mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);\
263 } while (0)
264 
265 static int __down_trylock_console_sem(unsigned long ip)
266 {
267 	int lock_failed;
268 	unsigned long flags;
269 
270 	/*
271 	 * Here and in __up_console_sem() we need to be in safe mode,
272 	 * because spindump/WARN/etc from under console ->lock will
273 	 * deadlock in printk()->down_trylock_console_sem() otherwise.
274 	 */
275 	printk_safe_enter_irqsave(flags);
276 	lock_failed = down_trylock(&console_sem);
277 	printk_safe_exit_irqrestore(flags);
278 
279 	if (lock_failed)
280 		return 1;
281 	mutex_acquire(&console_lock_dep_map, 0, 1, ip);
282 	return 0;
283 }
284 #define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_)
285 
286 static void __up_console_sem(unsigned long ip)
287 {
288 	unsigned long flags;
289 
290 	mutex_release(&console_lock_dep_map, ip);
291 
292 	printk_safe_enter_irqsave(flags);
293 	up(&console_sem);
294 	printk_safe_exit_irqrestore(flags);
295 }
296 #define up_console_sem() __up_console_sem(_RET_IP_)
297 
298 static bool panic_in_progress(void)
299 {
300 	return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID);
301 }
302 
303 /*
304  * Tracks whether kthread printers are all blocked. A value of true implies
305  * that the console is locked via console_lock() or the console is suspended.
306  * Writing to this variable requires holding @console_sem.
307  */
308 static bool console_kthreads_blocked;
309 
310 /*
311  * Block all kthread printers from a schedulable context.
312  *
313  * Requires holding @console_sem.
314  */
315 static void console_kthreads_block(void)
316 {
317 	struct console *con;
318 
319 	for_each_console(con) {
320 		mutex_lock(&con->lock);
321 		con->blocked = true;
322 		mutex_unlock(&con->lock);
323 	}
324 
325 	console_kthreads_blocked = true;
326 }
327 
328 /*
329  * Unblock all kthread printers from a schedulable context.
330  *
331  * Requires holding @console_sem.
332  */
333 static void console_kthreads_unblock(void)
334 {
335 	struct console *con;
336 
337 	for_each_console(con) {
338 		mutex_lock(&con->lock);
339 		con->blocked = false;
340 		mutex_unlock(&con->lock);
341 	}
342 
343 	console_kthreads_blocked = false;
344 }
345 
346 static int console_suspended;
347 
348 /*
349  *	Array of consoles built from command line options (console=)
350  */
351 
352 #define MAX_CMDLINECONSOLES 8
353 
354 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
355 
356 static int preferred_console = -1;
357 int console_set_on_cmdline;
358 EXPORT_SYMBOL(console_set_on_cmdline);
359 
360 /* Flag: console code may call schedule() */
361 static int console_may_schedule;
362 
363 enum con_msg_format_flags {
364 	MSG_FORMAT_DEFAULT	= 0,
365 	MSG_FORMAT_SYSLOG	= (1 << 0),
366 };
367 
368 static int console_msg_format = MSG_FORMAT_DEFAULT;
369 
370 /*
371  * The printk log buffer consists of a sequenced collection of records, each
372  * containing variable length message text. Every record also contains its
373  * own meta-data (@info).
374  *
375  * Every record meta-data carries the timestamp in microseconds, as well as
376  * the standard userspace syslog level and syslog facility. The usual kernel
377  * messages use LOG_KERN; userspace-injected messages always carry a matching
378  * syslog facility, by default LOG_USER. The origin of every message can be
379  * reliably determined that way.
380  *
381  * The human readable log message of a record is available in @text, the
382  * length of the message text in @text_len. The stored message is not
383  * terminated.
384  *
385  * Optionally, a record can carry a dictionary of properties (key/value
386  * pairs), to provide userspace with a machine-readable message context.
387  *
388  * Examples for well-defined, commonly used property names are:
389  *   DEVICE=b12:8               device identifier
390  *                                b12:8         block dev_t
391  *                                c127:3        char dev_t
392  *                                n8            netdev ifindex
393  *                                +sound:card0  subsystem:devname
394  *   SUBSYSTEM=pci              driver-core subsystem name
395  *
396  * Valid characters in property names are [a-zA-Z0-9.-_]. Property names
397  * and values are terminated by a '\0' character.
398  *
399  * Example of record values:
400  *   record.text_buf                = "it's a line" (unterminated)
401  *   record.info.seq                = 56
402  *   record.info.ts_nsec            = 36863
403  *   record.info.text_len           = 11
404  *   record.info.facility           = 0 (LOG_KERN)
405  *   record.info.flags              = 0
406  *   record.info.level              = 3 (LOG_ERR)
407  *   record.info.caller_id          = 299 (task 299)
408  *   record.info.dev_info.subsystem = "pci" (terminated)
409  *   record.info.dev_info.device    = "+pci:0000:00:01.0" (terminated)
410  *
411  * The 'struct printk_info' buffer must never be directly exported to
412  * userspace, it is a kernel-private implementation detail that might
413  * need to be changed in the future, when the requirements change.
414  *
415  * /dev/kmsg exports the structured data in the following line format:
416  *   "<level>,<sequnum>,<timestamp>,<contflag>[,additional_values, ... ];<message text>\n"
417  *
418  * Users of the export format should ignore possible additional values
419  * separated by ',', and find the message after the ';' character.
420  *
421  * The optional key/value pairs are attached as continuation lines starting
422  * with a space character and terminated by a newline. All possible
423  * non-prinatable characters are escaped in the "\xff" notation.
424  */
425 
426 /* syslog_lock protects syslog_* variables and write access to clear_seq. */
427 static DEFINE_MUTEX(syslog_lock);
428 
429 /*
430  * A flag to signify if printk_activate_kthreads() has already started the
431  * kthread printers. If true, any later registered consoles must start their
432  * own kthread directly. The flag is write protected by the console_lock.
433  */
434 static bool printk_kthreads_available;
435 
436 #ifdef CONFIG_PRINTK
437 static atomic_t printk_prefer_direct = ATOMIC_INIT(0);
438 
439 /**
440  * printk_prefer_direct_enter - cause printk() calls to attempt direct
441  *                              printing to all enabled consoles
442  *
443  * Since it is not possible to call into the console printing code from any
444  * context, there is no guarantee that direct printing will occur.
445  *
446  * This globally effects all printk() callers.
447  *
448  * Context: Any context.
449  */
450 void printk_prefer_direct_enter(void)
451 {
452 	atomic_inc(&printk_prefer_direct);
453 }
454 
455 /**
456  * printk_prefer_direct_exit - restore printk() behavior
457  *
458  * Context: Any context.
459  */
460 void printk_prefer_direct_exit(void)
461 {
462 	WARN_ON(atomic_dec_if_positive(&printk_prefer_direct) < 0);
463 }
464 
465 /*
466  * Calling printk() always wakes kthread printers so that they can
467  * flush the new message to their respective consoles. Also, if direct
468  * printing is allowed, printk() tries to flush the messages directly.
469  *
470  * Direct printing is allowed in situations when the kthreads
471  * are not available or the system is in a problematic state.
472  *
473  * See the implementation about possible races.
474  */
475 static inline bool allow_direct_printing(void)
476 {
477 	/*
478 	 * Checking kthread availability is a possible race because the
479 	 * kthread printers can become permanently disabled during runtime.
480 	 * However, doing that requires holding the console_lock, so any
481 	 * pending messages will be direct printed by console_unlock().
482 	 */
483 	if (!printk_kthreads_available)
484 		return true;
485 
486 	/*
487 	 * Prefer direct printing when the system is in a problematic state.
488 	 * The context that sets this state will always see the updated value.
489 	 * The other contexts do not care. Anyway, direct printing is just a
490 	 * best effort. The direct output is only possible when console_lock
491 	 * is not already taken and no kthread printers are actively printing.
492 	 */
493 	return (system_state > SYSTEM_RUNNING ||
494 		oops_in_progress ||
495 		atomic_read(&printk_prefer_direct));
496 }
497 
498 DECLARE_WAIT_QUEUE_HEAD(log_wait);
499 /* All 3 protected by @syslog_lock. */
500 /* the next printk record to read by syslog(READ) or /proc/kmsg */
501 static u64 syslog_seq;
502 static size_t syslog_partial;
503 static bool syslog_time;
504 
505 struct latched_seq {
506 	seqcount_latch_t	latch;
507 	u64			val[2];
508 };
509 
510 /*
511  * The next printk record to read after the last 'clear' command. There are
512  * two copies (updated with seqcount_latch) so that reads can locklessly
513  * access a valid value. Writers are synchronized by @syslog_lock.
514  */
515 static struct latched_seq clear_seq = {
516 	.latch		= SEQCNT_LATCH_ZERO(clear_seq.latch),
517 	.val[0]		= 0,
518 	.val[1]		= 0,
519 };
520 
521 #ifdef CONFIG_PRINTK_CALLER
522 #define PREFIX_MAX		48
523 #else
524 #define PREFIX_MAX		32
525 #endif
526 
527 /* the maximum size of a formatted record (i.e. with prefix added per line) */
528 #define CONSOLE_LOG_MAX		1024
529 
530 /* the maximum size for a dropped text message */
531 #define DROPPED_TEXT_MAX	64
532 
533 /* the maximum size allowed to be reserved for a record */
534 #define LOG_LINE_MAX		(CONSOLE_LOG_MAX - PREFIX_MAX)
535 
536 #define LOG_LEVEL(v)		((v) & 0x07)
537 #define LOG_FACILITY(v)		((v) >> 3 & 0xff)
538 
539 /* record buffer */
540 #define LOG_ALIGN __alignof__(unsigned long)
541 #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
542 #define LOG_BUF_LEN_MAX (u32)(1 << 31)
543 static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
544 static char *log_buf = __log_buf;
545 static u32 log_buf_len = __LOG_BUF_LEN;
546 
547 /*
548  * Define the average message size. This only affects the number of
549  * descriptors that will be available. Underestimating is better than
550  * overestimating (too many available descriptors is better than not enough).
551  */
552 #define PRB_AVGBITS 5	/* 32 character average length */
553 
554 #if CONFIG_LOG_BUF_SHIFT <= PRB_AVGBITS
555 #error CONFIG_LOG_BUF_SHIFT value too small.
556 #endif
557 _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS,
558 		 PRB_AVGBITS, &__log_buf[0]);
559 
560 static struct printk_ringbuffer printk_rb_dynamic;
561 
562 static struct printk_ringbuffer *prb = &printk_rb_static;
563 
564 /*
565  * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before
566  * per_cpu_areas are initialised. This variable is set to true when
567  * it's safe to access per-CPU data.
568  */
569 static bool __printk_percpu_data_ready __read_mostly;
570 
571 bool printk_percpu_data_ready(void)
572 {
573 	return __printk_percpu_data_ready;
574 }
575 
576 /* Must be called under syslog_lock. */
577 static void latched_seq_write(struct latched_seq *ls, u64 val)
578 {
579 	raw_write_seqcount_latch(&ls->latch);
580 	ls->val[0] = val;
581 	raw_write_seqcount_latch(&ls->latch);
582 	ls->val[1] = val;
583 }
584 
585 /* Can be called from any context. */
586 static u64 latched_seq_read_nolock(struct latched_seq *ls)
587 {
588 	unsigned int seq;
589 	unsigned int idx;
590 	u64 val;
591 
592 	do {
593 		seq = raw_read_seqcount_latch(&ls->latch);
594 		idx = seq & 0x1;
595 		val = ls->val[idx];
596 	} while (read_seqcount_latch_retry(&ls->latch, seq));
597 
598 	return val;
599 }
600 
601 /* Return log buffer address */
602 char *log_buf_addr_get(void)
603 {
604 	return log_buf;
605 }
606 
607 /* Return log buffer size */
608 u32 log_buf_len_get(void)
609 {
610 	return log_buf_len;
611 }
612 
613 /*
614  * Define how much of the log buffer we could take at maximum. The value
615  * must be greater than two. Note that only half of the buffer is available
616  * when the index points to the middle.
617  */
618 #define MAX_LOG_TAKE_PART 4
619 static const char trunc_msg[] = "<truncated>";
620 
621 static void truncate_msg(u16 *text_len, u16 *trunc_msg_len)
622 {
623 	/*
624 	 * The message should not take the whole buffer. Otherwise, it might
625 	 * get removed too soon.
626 	 */
627 	u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART;
628 
629 	if (*text_len > max_text_len)
630 		*text_len = max_text_len;
631 
632 	/* enable the warning message (if there is room) */
633 	*trunc_msg_len = strlen(trunc_msg);
634 	if (*text_len >= *trunc_msg_len)
635 		*text_len -= *trunc_msg_len;
636 	else
637 		*trunc_msg_len = 0;
638 }
639 
640 int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT);
641 
642 static int syslog_action_restricted(int type)
643 {
644 	if (dmesg_restrict)
645 		return 1;
646 	/*
647 	 * Unless restricted, we allow "read all" and "get buffer size"
648 	 * for everybody.
649 	 */
650 	return type != SYSLOG_ACTION_READ_ALL &&
651 	       type != SYSLOG_ACTION_SIZE_BUFFER;
652 }
653 
654 static int check_syslog_permissions(int type, int source)
655 {
656 	/*
657 	 * If this is from /proc/kmsg and we've already opened it, then we've
658 	 * already done the capabilities checks at open time.
659 	 */
660 	if (source == SYSLOG_FROM_PROC && type != SYSLOG_ACTION_OPEN)
661 		goto ok;
662 
663 	if (syslog_action_restricted(type)) {
664 		if (capable(CAP_SYSLOG))
665 			goto ok;
666 		/*
667 		 * For historical reasons, accept CAP_SYS_ADMIN too, with
668 		 * a warning.
669 		 */
670 		if (capable(CAP_SYS_ADMIN)) {
671 			pr_warn_once("%s (%d): Attempt to access syslog with "
672 				     "CAP_SYS_ADMIN but no CAP_SYSLOG "
673 				     "(deprecated).\n",
674 				 current->comm, task_pid_nr(current));
675 			goto ok;
676 		}
677 		return -EPERM;
678 	}
679 ok:
680 	return security_syslog(type);
681 }
682 
683 static void append_char(char **pp, char *e, char c)
684 {
685 	if (*pp < e)
686 		*(*pp)++ = c;
687 }
688 
689 static ssize_t info_print_ext_header(char *buf, size_t size,
690 				     struct printk_info *info)
691 {
692 	u64 ts_usec = info->ts_nsec;
693 	char caller[20];
694 #ifdef CONFIG_PRINTK_CALLER
695 	u32 id = info->caller_id;
696 
697 	snprintf(caller, sizeof(caller), ",caller=%c%u",
698 		 id & 0x80000000 ? 'C' : 'T', id & ~0x80000000);
699 #else
700 	caller[0] = '\0';
701 #endif
702 
703 	do_div(ts_usec, 1000);
704 
705 	return scnprintf(buf, size, "%u,%llu,%llu,%c%s;",
706 			 (info->facility << 3) | info->level, info->seq,
707 			 ts_usec, info->flags & LOG_CONT ? 'c' : '-', caller);
708 }
709 
710 static ssize_t msg_add_ext_text(char *buf, size_t size,
711 				const char *text, size_t text_len,
712 				unsigned char endc)
713 {
714 	char *p = buf, *e = buf + size;
715 	size_t i;
716 
717 	/* escape non-printable characters */
718 	for (i = 0; i < text_len; i++) {
719 		unsigned char c = text[i];
720 
721 		if (c < ' ' || c >= 127 || c == '\\')
722 			p += scnprintf(p, e - p, "\\x%02x", c);
723 		else
724 			append_char(&p, e, c);
725 	}
726 	append_char(&p, e, endc);
727 
728 	return p - buf;
729 }
730 
731 static ssize_t msg_add_dict_text(char *buf, size_t size,
732 				 const char *key, const char *val)
733 {
734 	size_t val_len = strlen(val);
735 	ssize_t len;
736 
737 	if (!val_len)
738 		return 0;
739 
740 	len = msg_add_ext_text(buf, size, "", 0, ' ');	/* dict prefix */
741 	len += msg_add_ext_text(buf + len, size - len, key, strlen(key), '=');
742 	len += msg_add_ext_text(buf + len, size - len, val, val_len, '\n');
743 
744 	return len;
745 }
746 
747 static ssize_t msg_print_ext_body(char *buf, size_t size,
748 				  char *text, size_t text_len,
749 				  struct dev_printk_info *dev_info)
750 {
751 	ssize_t len;
752 
753 	len = msg_add_ext_text(buf, size, text, text_len, '\n');
754 
755 	if (!dev_info)
756 		goto out;
757 
758 	len += msg_add_dict_text(buf + len, size - len, "SUBSYSTEM",
759 				 dev_info->subsystem);
760 	len += msg_add_dict_text(buf + len, size - len, "DEVICE",
761 				 dev_info->device);
762 out:
763 	return len;
764 }
765 
766 /* /dev/kmsg - userspace message inject/listen interface */
767 struct devkmsg_user {
768 	atomic64_t seq;
769 	struct ratelimit_state rs;
770 	struct mutex lock;
771 	char buf[CONSOLE_EXT_LOG_MAX];
772 
773 	struct printk_info info;
774 	char text_buf[CONSOLE_EXT_LOG_MAX];
775 	struct printk_record record;
776 };
777 
778 static __printf(3, 4) __cold
779 int devkmsg_emit(int facility, int level, const char *fmt, ...)
780 {
781 	va_list args;
782 	int r;
783 
784 	va_start(args, fmt);
785 	r = vprintk_emit(facility, level, NULL, fmt, args);
786 	va_end(args);
787 
788 	return r;
789 }
790 
791 static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
792 {
793 	char *buf, *line;
794 	int level = default_message_loglevel;
795 	int facility = 1;	/* LOG_USER */
796 	struct file *file = iocb->ki_filp;
797 	struct devkmsg_user *user = file->private_data;
798 	size_t len = iov_iter_count(from);
799 	ssize_t ret = len;
800 
801 	if (!user || len > LOG_LINE_MAX)
802 		return -EINVAL;
803 
804 	/* Ignore when user logging is disabled. */
805 	if (devkmsg_log & DEVKMSG_LOG_MASK_OFF)
806 		return len;
807 
808 	/* Ratelimit when not explicitly enabled. */
809 	if (!(devkmsg_log & DEVKMSG_LOG_MASK_ON)) {
810 		if (!___ratelimit(&user->rs, current->comm))
811 			return ret;
812 	}
813 
814 	buf = kmalloc(len+1, GFP_KERNEL);
815 	if (buf == NULL)
816 		return -ENOMEM;
817 
818 	buf[len] = '\0';
819 	if (!copy_from_iter_full(buf, len, from)) {
820 		kfree(buf);
821 		return -EFAULT;
822 	}
823 
824 	/*
825 	 * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace
826 	 * the decimal value represents 32bit, the lower 3 bit are the log
827 	 * level, the rest are the log facility.
828 	 *
829 	 * If no prefix or no userspace facility is specified, we
830 	 * enforce LOG_USER, to be able to reliably distinguish
831 	 * kernel-generated messages from userspace-injected ones.
832 	 */
833 	line = buf;
834 	if (line[0] == '<') {
835 		char *endp = NULL;
836 		unsigned int u;
837 
838 		u = simple_strtoul(line + 1, &endp, 10);
839 		if (endp && endp[0] == '>') {
840 			level = LOG_LEVEL(u);
841 			if (LOG_FACILITY(u) != 0)
842 				facility = LOG_FACILITY(u);
843 			endp++;
844 			line = endp;
845 		}
846 	}
847 
848 	devkmsg_emit(facility, level, "%s", line);
849 	kfree(buf);
850 	return ret;
851 }
852 
853 static ssize_t devkmsg_read(struct file *file, char __user *buf,
854 			    size_t count, loff_t *ppos)
855 {
856 	struct devkmsg_user *user = file->private_data;
857 	struct printk_record *r = &user->record;
858 	size_t len;
859 	ssize_t ret;
860 
861 	if (!user)
862 		return -EBADF;
863 
864 	ret = mutex_lock_interruptible(&user->lock);
865 	if (ret)
866 		return ret;
867 
868 	if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) {
869 		if (file->f_flags & O_NONBLOCK) {
870 			ret = -EAGAIN;
871 			goto out;
872 		}
873 
874 		/*
875 		 * Guarantee this task is visible on the waitqueue before
876 		 * checking the wake condition.
877 		 *
878 		 * The full memory barrier within set_current_state() of
879 		 * prepare_to_wait_event() pairs with the full memory barrier
880 		 * within wq_has_sleeper().
881 		 *
882 		 * This pairs with __wake_up_klogd:A.
883 		 */
884 		ret = wait_event_interruptible(log_wait,
885 				prb_read_valid(prb,
886 					atomic64_read(&user->seq), r)); /* LMM(devkmsg_read:A) */
887 		if (ret)
888 			goto out;
889 	}
890 
891 	if (r->info->seq != atomic64_read(&user->seq)) {
892 		/* our last seen message is gone, return error and reset */
893 		atomic64_set(&user->seq, r->info->seq);
894 		ret = -EPIPE;
895 		goto out;
896 	}
897 
898 	len = info_print_ext_header(user->buf, sizeof(user->buf), r->info);
899 	len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len,
900 				  &r->text_buf[0], r->info->text_len,
901 				  &r->info->dev_info);
902 
903 	atomic64_set(&user->seq, r->info->seq + 1);
904 
905 	if (len > count) {
906 		ret = -EINVAL;
907 		goto out;
908 	}
909 
910 	if (copy_to_user(buf, user->buf, len)) {
911 		ret = -EFAULT;
912 		goto out;
913 	}
914 	ret = len;
915 out:
916 	mutex_unlock(&user->lock);
917 	return ret;
918 }
919 
920 /*
921  * Be careful when modifying this function!!!
922  *
923  * Only few operations are supported because the device works only with the
924  * entire variable length messages (records). Non-standard values are
925  * returned in the other cases and has been this way for quite some time.
926  * User space applications might depend on this behavior.
927  */
928 static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
929 {
930 	struct devkmsg_user *user = file->private_data;
931 	loff_t ret = 0;
932 
933 	if (!user)
934 		return -EBADF;
935 	if (offset)
936 		return -ESPIPE;
937 
938 	switch (whence) {
939 	case SEEK_SET:
940 		/* the first record */
941 		atomic64_set(&user->seq, prb_first_valid_seq(prb));
942 		break;
943 	case SEEK_DATA:
944 		/*
945 		 * The first record after the last SYSLOG_ACTION_CLEAR,
946 		 * like issued by 'dmesg -c'. Reading /dev/kmsg itself
947 		 * changes no global state, and does not clear anything.
948 		 */
949 		atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq));
950 		break;
951 	case SEEK_END:
952 		/* after the last record */
953 		atomic64_set(&user->seq, prb_next_seq(prb));
954 		break;
955 	default:
956 		ret = -EINVAL;
957 	}
958 	return ret;
959 }
960 
961 static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
962 {
963 	struct devkmsg_user *user = file->private_data;
964 	struct printk_info info;
965 	__poll_t ret = 0;
966 
967 	if (!user)
968 		return EPOLLERR|EPOLLNVAL;
969 
970 	poll_wait(file, &log_wait, wait);
971 
972 	if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) {
973 		/* return error when data has vanished underneath us */
974 		if (info.seq != atomic64_read(&user->seq))
975 			ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
976 		else
977 			ret = EPOLLIN|EPOLLRDNORM;
978 	}
979 
980 	return ret;
981 }
982 
983 static int devkmsg_open(struct inode *inode, struct file *file)
984 {
985 	struct devkmsg_user *user;
986 	int err;
987 
988 	if (devkmsg_log & DEVKMSG_LOG_MASK_OFF)
989 		return -EPERM;
990 
991 	/* write-only does not need any file context */
992 	if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
993 		err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL,
994 					       SYSLOG_FROM_READER);
995 		if (err)
996 			return err;
997 	}
998 
999 	user = kvmalloc(sizeof(struct devkmsg_user), GFP_KERNEL);
1000 	if (!user)
1001 		return -ENOMEM;
1002 
1003 	ratelimit_default_init(&user->rs);
1004 	ratelimit_set_flags(&user->rs, RATELIMIT_MSG_ON_RELEASE);
1005 
1006 	mutex_init(&user->lock);
1007 
1008 	prb_rec_init_rd(&user->record, &user->info,
1009 			&user->text_buf[0], sizeof(user->text_buf));
1010 
1011 	atomic64_set(&user->seq, prb_first_valid_seq(prb));
1012 
1013 	file->private_data = user;
1014 	return 0;
1015 }
1016 
1017 static int devkmsg_release(struct inode *inode, struct file *file)
1018 {
1019 	struct devkmsg_user *user = file->private_data;
1020 
1021 	if (!user)
1022 		return 0;
1023 
1024 	ratelimit_state_exit(&user->rs);
1025 
1026 	mutex_destroy(&user->lock);
1027 	kvfree(user);
1028 	return 0;
1029 }
1030 
1031 const struct file_operations kmsg_fops = {
1032 	.open = devkmsg_open,
1033 	.read = devkmsg_read,
1034 	.write_iter = devkmsg_write,
1035 	.llseek = devkmsg_llseek,
1036 	.poll = devkmsg_poll,
1037 	.release = devkmsg_release,
1038 };
1039 
1040 #ifdef CONFIG_CRASH_CORE
1041 /*
1042  * This appends the listed symbols to /proc/vmcore
1043  *
1044  * /proc/vmcore is used by various utilities, like crash and makedumpfile to
1045  * obtain access to symbols that are otherwise very difficult to locate.  These
1046  * symbols are specifically used so that utilities can access and extract the
1047  * dmesg log from a vmcore file after a crash.
1048  */
1049 void log_buf_vmcoreinfo_setup(void)
1050 {
1051 	struct dev_printk_info *dev_info = NULL;
1052 
1053 	VMCOREINFO_SYMBOL(prb);
1054 	VMCOREINFO_SYMBOL(printk_rb_static);
1055 	VMCOREINFO_SYMBOL(clear_seq);
1056 
1057 	/*
1058 	 * Export struct size and field offsets. User space tools can
1059 	 * parse it and detect any changes to structure down the line.
1060 	 */
1061 
1062 	VMCOREINFO_STRUCT_SIZE(printk_ringbuffer);
1063 	VMCOREINFO_OFFSET(printk_ringbuffer, desc_ring);
1064 	VMCOREINFO_OFFSET(printk_ringbuffer, text_data_ring);
1065 	VMCOREINFO_OFFSET(printk_ringbuffer, fail);
1066 
1067 	VMCOREINFO_STRUCT_SIZE(prb_desc_ring);
1068 	VMCOREINFO_OFFSET(prb_desc_ring, count_bits);
1069 	VMCOREINFO_OFFSET(prb_desc_ring, descs);
1070 	VMCOREINFO_OFFSET(prb_desc_ring, infos);
1071 	VMCOREINFO_OFFSET(prb_desc_ring, head_id);
1072 	VMCOREINFO_OFFSET(prb_desc_ring, tail_id);
1073 
1074 	VMCOREINFO_STRUCT_SIZE(prb_desc);
1075 	VMCOREINFO_OFFSET(prb_desc, state_var);
1076 	VMCOREINFO_OFFSET(prb_desc, text_blk_lpos);
1077 
1078 	VMCOREINFO_STRUCT_SIZE(prb_data_blk_lpos);
1079 	VMCOREINFO_OFFSET(prb_data_blk_lpos, begin);
1080 	VMCOREINFO_OFFSET(prb_data_blk_lpos, next);
1081 
1082 	VMCOREINFO_STRUCT_SIZE(printk_info);
1083 	VMCOREINFO_OFFSET(printk_info, seq);
1084 	VMCOREINFO_OFFSET(printk_info, ts_nsec);
1085 	VMCOREINFO_OFFSET(printk_info, text_len);
1086 	VMCOREINFO_OFFSET(printk_info, caller_id);
1087 	VMCOREINFO_OFFSET(printk_info, dev_info);
1088 
1089 	VMCOREINFO_STRUCT_SIZE(dev_printk_info);
1090 	VMCOREINFO_OFFSET(dev_printk_info, subsystem);
1091 	VMCOREINFO_LENGTH(printk_info_subsystem, sizeof(dev_info->subsystem));
1092 	VMCOREINFO_OFFSET(dev_printk_info, device);
1093 	VMCOREINFO_LENGTH(printk_info_device, sizeof(dev_info->device));
1094 
1095 	VMCOREINFO_STRUCT_SIZE(prb_data_ring);
1096 	VMCOREINFO_OFFSET(prb_data_ring, size_bits);
1097 	VMCOREINFO_OFFSET(prb_data_ring, data);
1098 	VMCOREINFO_OFFSET(prb_data_ring, head_lpos);
1099 	VMCOREINFO_OFFSET(prb_data_ring, tail_lpos);
1100 
1101 	VMCOREINFO_SIZE(atomic_long_t);
1102 	VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter);
1103 
1104 	VMCOREINFO_STRUCT_SIZE(latched_seq);
1105 	VMCOREINFO_OFFSET(latched_seq, val);
1106 }
1107 #endif
1108 
1109 /* requested log_buf_len from kernel cmdline */
1110 static unsigned long __initdata new_log_buf_len;
1111 
1112 /* we practice scaling the ring buffer by powers of 2 */
1113 static void __init log_buf_len_update(u64 size)
1114 {
1115 	if (size > (u64)LOG_BUF_LEN_MAX) {
1116 		size = (u64)LOG_BUF_LEN_MAX;
1117 		pr_err("log_buf over 2G is not supported.\n");
1118 	}
1119 
1120 	if (size)
1121 		size = roundup_pow_of_two(size);
1122 	if (size > log_buf_len)
1123 		new_log_buf_len = (unsigned long)size;
1124 }
1125 
1126 /* save requested log_buf_len since it's too early to process it */
1127 static int __init log_buf_len_setup(char *str)
1128 {
1129 	u64 size;
1130 
1131 	if (!str)
1132 		return -EINVAL;
1133 
1134 	size = memparse(str, &str);
1135 
1136 	log_buf_len_update(size);
1137 
1138 	return 0;
1139 }
1140 early_param("log_buf_len", log_buf_len_setup);
1141 
1142 #ifdef CONFIG_SMP
1143 #define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT)
1144 
1145 static void __init log_buf_add_cpu(void)
1146 {
1147 	unsigned int cpu_extra;
1148 
1149 	/*
1150 	 * archs should set up cpu_possible_bits properly with
1151 	 * set_cpu_possible() after setup_arch() but just in
1152 	 * case lets ensure this is valid.
1153 	 */
1154 	if (num_possible_cpus() == 1)
1155 		return;
1156 
1157 	cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN;
1158 
1159 	/* by default this will only continue through for large > 64 CPUs */
1160 	if (cpu_extra <= __LOG_BUF_LEN / 2)
1161 		return;
1162 
1163 	pr_info("log_buf_len individual max cpu contribution: %d bytes\n",
1164 		__LOG_CPU_MAX_BUF_LEN);
1165 	pr_info("log_buf_len total cpu_extra contributions: %d bytes\n",
1166 		cpu_extra);
1167 	pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN);
1168 
1169 	log_buf_len_update(cpu_extra + __LOG_BUF_LEN);
1170 }
1171 #else /* !CONFIG_SMP */
1172 static inline void log_buf_add_cpu(void) {}
1173 #endif /* CONFIG_SMP */
1174 
1175 static void __init set_percpu_data_ready(void)
1176 {
1177 	__printk_percpu_data_ready = true;
1178 }
1179 
1180 static unsigned int __init add_to_rb(struct printk_ringbuffer *rb,
1181 				     struct printk_record *r)
1182 {
1183 	struct prb_reserved_entry e;
1184 	struct printk_record dest_r;
1185 
1186 	prb_rec_init_wr(&dest_r, r->info->text_len);
1187 
1188 	if (!prb_reserve(&e, rb, &dest_r))
1189 		return 0;
1190 
1191 	memcpy(&dest_r.text_buf[0], &r->text_buf[0], r->info->text_len);
1192 	dest_r.info->text_len = r->info->text_len;
1193 	dest_r.info->facility = r->info->facility;
1194 	dest_r.info->level = r->info->level;
1195 	dest_r.info->flags = r->info->flags;
1196 	dest_r.info->ts_nsec = r->info->ts_nsec;
1197 	dest_r.info->caller_id = r->info->caller_id;
1198 	memcpy(&dest_r.info->dev_info, &r->info->dev_info, sizeof(dest_r.info->dev_info));
1199 
1200 	prb_final_commit(&e);
1201 
1202 	return prb_record_text_space(&e);
1203 }
1204 
1205 static char setup_text_buf[LOG_LINE_MAX] __initdata;
1206 
1207 void __init setup_log_buf(int early)
1208 {
1209 	struct printk_info *new_infos;
1210 	unsigned int new_descs_count;
1211 	struct prb_desc *new_descs;
1212 	struct printk_info info;
1213 	struct printk_record r;
1214 	unsigned int text_size;
1215 	size_t new_descs_size;
1216 	size_t new_infos_size;
1217 	unsigned long flags;
1218 	char *new_log_buf;
1219 	unsigned int free;
1220 	u64 seq;
1221 
1222 	/*
1223 	 * Some archs call setup_log_buf() multiple times - first is very
1224 	 * early, e.g. from setup_arch(), and second - when percpu_areas
1225 	 * are initialised.
1226 	 */
1227 	if (!early)
1228 		set_percpu_data_ready();
1229 
1230 	if (log_buf != __log_buf)
1231 		return;
1232 
1233 	if (!early && !new_log_buf_len)
1234 		log_buf_add_cpu();
1235 
1236 	if (!new_log_buf_len)
1237 		return;
1238 
1239 	new_descs_count = new_log_buf_len >> PRB_AVGBITS;
1240 	if (new_descs_count == 0) {
1241 		pr_err("new_log_buf_len: %lu too small\n", new_log_buf_len);
1242 		return;
1243 	}
1244 
1245 	new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN);
1246 	if (unlikely(!new_log_buf)) {
1247 		pr_err("log_buf_len: %lu text bytes not available\n",
1248 		       new_log_buf_len);
1249 		return;
1250 	}
1251 
1252 	new_descs_size = new_descs_count * sizeof(struct prb_desc);
1253 	new_descs = memblock_alloc(new_descs_size, LOG_ALIGN);
1254 	if (unlikely(!new_descs)) {
1255 		pr_err("log_buf_len: %zu desc bytes not available\n",
1256 		       new_descs_size);
1257 		goto err_free_log_buf;
1258 	}
1259 
1260 	new_infos_size = new_descs_count * sizeof(struct printk_info);
1261 	new_infos = memblock_alloc(new_infos_size, LOG_ALIGN);
1262 	if (unlikely(!new_infos)) {
1263 		pr_err("log_buf_len: %zu info bytes not available\n",
1264 		       new_infos_size);
1265 		goto err_free_descs;
1266 	}
1267 
1268 	prb_rec_init_rd(&r, &info, &setup_text_buf[0], sizeof(setup_text_buf));
1269 
1270 	prb_init(&printk_rb_dynamic,
1271 		 new_log_buf, ilog2(new_log_buf_len),
1272 		 new_descs, ilog2(new_descs_count),
1273 		 new_infos);
1274 
1275 	local_irq_save(flags);
1276 
1277 	log_buf_len = new_log_buf_len;
1278 	log_buf = new_log_buf;
1279 	new_log_buf_len = 0;
1280 
1281 	free = __LOG_BUF_LEN;
1282 	prb_for_each_record(0, &printk_rb_static, seq, &r) {
1283 		text_size = add_to_rb(&printk_rb_dynamic, &r);
1284 		if (text_size > free)
1285 			free = 0;
1286 		else
1287 			free -= text_size;
1288 	}
1289 
1290 	prb = &printk_rb_dynamic;
1291 
1292 	local_irq_restore(flags);
1293 
1294 	/*
1295 	 * Copy any remaining messages that might have appeared from
1296 	 * NMI context after copying but before switching to the
1297 	 * dynamic buffer.
1298 	 */
1299 	prb_for_each_record(seq, &printk_rb_static, seq, &r) {
1300 		text_size = add_to_rb(&printk_rb_dynamic, &r);
1301 		if (text_size > free)
1302 			free = 0;
1303 		else
1304 			free -= text_size;
1305 	}
1306 
1307 	if (seq != prb_next_seq(&printk_rb_static)) {
1308 		pr_err("dropped %llu messages\n",
1309 		       prb_next_seq(&printk_rb_static) - seq);
1310 	}
1311 
1312 	pr_info("log_buf_len: %u bytes\n", log_buf_len);
1313 	pr_info("early log buf free: %u(%u%%)\n",
1314 		free, (free * 100) / __LOG_BUF_LEN);
1315 	return;
1316 
1317 err_free_descs:
1318 	memblock_free(new_descs, new_descs_size);
1319 err_free_log_buf:
1320 	memblock_free(new_log_buf, new_log_buf_len);
1321 }
1322 
1323 static bool __read_mostly ignore_loglevel;
1324 
1325 static int __init ignore_loglevel_setup(char *str)
1326 {
1327 	ignore_loglevel = true;
1328 	pr_info("debug: ignoring loglevel setting.\n");
1329 
1330 	return 0;
1331 }
1332 
1333 early_param("ignore_loglevel", ignore_loglevel_setup);
1334 module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
1335 MODULE_PARM_DESC(ignore_loglevel,
1336 		 "ignore loglevel setting (prints all kernel messages to the console)");
1337 
1338 static bool suppress_message_printing(int level)
1339 {
1340 	return (level >= console_loglevel && !ignore_loglevel);
1341 }
1342 
1343 #ifdef CONFIG_BOOT_PRINTK_DELAY
1344 
1345 static int boot_delay; /* msecs delay after each printk during bootup */
1346 static unsigned long long loops_per_msec;	/* based on boot_delay */
1347 
1348 static int __init boot_delay_setup(char *str)
1349 {
1350 	unsigned long lpj;
1351 
1352 	lpj = preset_lpj ? preset_lpj : 1000000;	/* some guess */
1353 	loops_per_msec = (unsigned long long)lpj / 1000 * HZ;
1354 
1355 	get_option(&str, &boot_delay);
1356 	if (boot_delay > 10 * 1000)
1357 		boot_delay = 0;
1358 
1359 	pr_debug("boot_delay: %u, preset_lpj: %ld, lpj: %lu, "
1360 		"HZ: %d, loops_per_msec: %llu\n",
1361 		boot_delay, preset_lpj, lpj, HZ, loops_per_msec);
1362 	return 0;
1363 }
1364 early_param("boot_delay", boot_delay_setup);
1365 
1366 static void boot_delay_msec(int level)
1367 {
1368 	unsigned long long k;
1369 	unsigned long timeout;
1370 
1371 	if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING)
1372 		|| suppress_message_printing(level)) {
1373 		return;
1374 	}
1375 
1376 	k = (unsigned long long)loops_per_msec * boot_delay;
1377 
1378 	timeout = jiffies + msecs_to_jiffies(boot_delay);
1379 	while (k) {
1380 		k--;
1381 		cpu_relax();
1382 		/*
1383 		 * use (volatile) jiffies to prevent
1384 		 * compiler reduction; loop termination via jiffies
1385 		 * is secondary and may or may not happen.
1386 		 */
1387 		if (time_after(jiffies, timeout))
1388 			break;
1389 		touch_nmi_watchdog();
1390 	}
1391 }
1392 #else
1393 static inline void boot_delay_msec(int level)
1394 {
1395 }
1396 #endif
1397 
1398 static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
1399 module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
1400 
1401 static size_t print_syslog(unsigned int level, char *buf)
1402 {
1403 	return sprintf(buf, "<%u>", level);
1404 }
1405 
1406 static size_t print_time(u64 ts, char *buf)
1407 {
1408 	unsigned long rem_nsec = do_div(ts, 1000000000);
1409 
1410 	return sprintf(buf, "[%5lu.%06lu]",
1411 		       (unsigned long)ts, rem_nsec / 1000);
1412 }
1413 
1414 #ifdef CONFIG_PRINTK_CALLER
1415 static size_t print_caller(u32 id, char *buf)
1416 {
1417 	char caller[12];
1418 
1419 	snprintf(caller, sizeof(caller), "%c%u",
1420 		 id & 0x80000000 ? 'C' : 'T', id & ~0x80000000);
1421 	return sprintf(buf, "[%6s]", caller);
1422 }
1423 #else
1424 #define print_caller(id, buf) 0
1425 #endif
1426 
1427 static size_t info_print_prefix(const struct printk_info  *info, bool syslog,
1428 				bool time, char *buf)
1429 {
1430 	size_t len = 0;
1431 
1432 	if (syslog)
1433 		len = print_syslog((info->facility << 3) | info->level, buf);
1434 
1435 	if (time)
1436 		len += print_time(info->ts_nsec, buf + len);
1437 
1438 	len += print_caller(info->caller_id, buf + len);
1439 
1440 	if (IS_ENABLED(CONFIG_PRINTK_CALLER) || time) {
1441 		buf[len++] = ' ';
1442 		buf[len] = '\0';
1443 	}
1444 
1445 	return len;
1446 }
1447 
1448 /*
1449  * Prepare the record for printing. The text is shifted within the given
1450  * buffer to avoid a need for another one. The following operations are
1451  * done:
1452  *
1453  *   - Add prefix for each line.
1454  *   - Drop truncated lines that no longer fit into the buffer.
1455  *   - Add the trailing newline that has been removed in vprintk_store().
1456  *   - Add a string terminator.
1457  *
1458  * Since the produced string is always terminated, the maximum possible
1459  * return value is @r->text_buf_size - 1;
1460  *
1461  * Return: The length of the updated/prepared text, including the added
1462  * prefixes and the newline. The terminator is not counted. The dropped
1463  * line(s) are not counted.
1464  */
1465 static size_t record_print_text(struct printk_record *r, bool syslog,
1466 				bool time)
1467 {
1468 	size_t text_len = r->info->text_len;
1469 	size_t buf_size = r->text_buf_size;
1470 	char *text = r->text_buf;
1471 	char prefix[PREFIX_MAX];
1472 	bool truncated = false;
1473 	size_t prefix_len;
1474 	size_t line_len;
1475 	size_t len = 0;
1476 	char *next;
1477 
1478 	/*
1479 	 * If the message was truncated because the buffer was not large
1480 	 * enough, treat the available text as if it were the full text.
1481 	 */
1482 	if (text_len > buf_size)
1483 		text_len = buf_size;
1484 
1485 	prefix_len = info_print_prefix(r->info, syslog, time, prefix);
1486 
1487 	/*
1488 	 * @text_len: bytes of unprocessed text
1489 	 * @line_len: bytes of current line _without_ newline
1490 	 * @text:     pointer to beginning of current line
1491 	 * @len:      number of bytes prepared in r->text_buf
1492 	 */
1493 	for (;;) {
1494 		next = memchr(text, '\n', text_len);
1495 		if (next) {
1496 			line_len = next - text;
1497 		} else {
1498 			/* Drop truncated line(s). */
1499 			if (truncated)
1500 				break;
1501 			line_len = text_len;
1502 		}
1503 
1504 		/*
1505 		 * Truncate the text if there is not enough space to add the
1506 		 * prefix and a trailing newline and a terminator.
1507 		 */
1508 		if (len + prefix_len + text_len + 1 + 1 > buf_size) {
1509 			/* Drop even the current line if no space. */
1510 			if (len + prefix_len + line_len + 1 + 1 > buf_size)
1511 				break;
1512 
1513 			text_len = buf_size - len - prefix_len - 1 - 1;
1514 			truncated = true;
1515 		}
1516 
1517 		memmove(text + prefix_len, text, text_len);
1518 		memcpy(text, prefix, prefix_len);
1519 
1520 		/*
1521 		 * Increment the prepared length to include the text and
1522 		 * prefix that were just moved+copied. Also increment for the
1523 		 * newline at the end of this line. If this is the last line,
1524 		 * there is no newline, but it will be added immediately below.
1525 		 */
1526 		len += prefix_len + line_len + 1;
1527 		if (text_len == line_len) {
1528 			/*
1529 			 * This is the last line. Add the trailing newline
1530 			 * removed in vprintk_store().
1531 			 */
1532 			text[prefix_len + line_len] = '\n';
1533 			break;
1534 		}
1535 
1536 		/*
1537 		 * Advance beyond the added prefix and the related line with
1538 		 * its newline.
1539 		 */
1540 		text += prefix_len + line_len + 1;
1541 
1542 		/*
1543 		 * The remaining text has only decreased by the line with its
1544 		 * newline.
1545 		 *
1546 		 * Note that @text_len can become zero. It happens when @text
1547 		 * ended with a newline (either due to truncation or the
1548 		 * original string ending with "\n\n"). The loop is correctly
1549 		 * repeated and (if not truncated) an empty line with a prefix
1550 		 * will be prepared.
1551 		 */
1552 		text_len -= line_len + 1;
1553 	}
1554 
1555 	/*
1556 	 * If a buffer was provided, it will be terminated. Space for the
1557 	 * string terminator is guaranteed to be available. The terminator is
1558 	 * not counted in the return value.
1559 	 */
1560 	if (buf_size > 0)
1561 		r->text_buf[len] = 0;
1562 
1563 	return len;
1564 }
1565 
1566 static size_t get_record_print_text_size(struct printk_info *info,
1567 					 unsigned int line_count,
1568 					 bool syslog, bool time)
1569 {
1570 	char prefix[PREFIX_MAX];
1571 	size_t prefix_len;
1572 
1573 	prefix_len = info_print_prefix(info, syslog, time, prefix);
1574 
1575 	/*
1576 	 * Each line will be preceded with a prefix. The intermediate
1577 	 * newlines are already within the text, but a final trailing
1578 	 * newline will be added.
1579 	 */
1580 	return ((prefix_len * line_count) + info->text_len + 1);
1581 }
1582 
1583 /*
1584  * Beginning with @start_seq, find the first record where it and all following
1585  * records up to (but not including) @max_seq fit into @size.
1586  *
1587  * @max_seq is simply an upper bound and does not need to exist. If the caller
1588  * does not require an upper bound, -1 can be used for @max_seq.
1589  */
1590 static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size,
1591 				  bool syslog, bool time)
1592 {
1593 	struct printk_info info;
1594 	unsigned int line_count;
1595 	size_t len = 0;
1596 	u64 seq;
1597 
1598 	/* Determine the size of the records up to @max_seq. */
1599 	prb_for_each_info(start_seq, prb, seq, &info, &line_count) {
1600 		if (info.seq >= max_seq)
1601 			break;
1602 		len += get_record_print_text_size(&info, line_count, syslog, time);
1603 	}
1604 
1605 	/*
1606 	 * Adjust the upper bound for the next loop to avoid subtracting
1607 	 * lengths that were never added.
1608 	 */
1609 	if (seq < max_seq)
1610 		max_seq = seq;
1611 
1612 	/*
1613 	 * Move first record forward until length fits into the buffer. Ignore
1614 	 * newest messages that were not counted in the above cycle. Messages
1615 	 * might appear and get lost in the meantime. This is a best effort
1616 	 * that prevents an infinite loop that could occur with a retry.
1617 	 */
1618 	prb_for_each_info(start_seq, prb, seq, &info, &line_count) {
1619 		if (len <= size || info.seq >= max_seq)
1620 			break;
1621 		len -= get_record_print_text_size(&info, line_count, syslog, time);
1622 	}
1623 
1624 	return seq;
1625 }
1626 
1627 /* The caller is responsible for making sure @size is greater than 0. */
1628 static int syslog_print(char __user *buf, int size)
1629 {
1630 	struct printk_info info;
1631 	struct printk_record r;
1632 	char *text;
1633 	int len = 0;
1634 	u64 seq;
1635 
1636 	text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
1637 	if (!text)
1638 		return -ENOMEM;
1639 
1640 	prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
1641 
1642 	mutex_lock(&syslog_lock);
1643 
1644 	/*
1645 	 * Wait for the @syslog_seq record to be available. @syslog_seq may
1646 	 * change while waiting.
1647 	 */
1648 	do {
1649 		seq = syslog_seq;
1650 
1651 		mutex_unlock(&syslog_lock);
1652 		/*
1653 		 * Guarantee this task is visible on the waitqueue before
1654 		 * checking the wake condition.
1655 		 *
1656 		 * The full memory barrier within set_current_state() of
1657 		 * prepare_to_wait_event() pairs with the full memory barrier
1658 		 * within wq_has_sleeper().
1659 		 *
1660 		 * This pairs with __wake_up_klogd:A.
1661 		 */
1662 		len = wait_event_interruptible(log_wait,
1663 				prb_read_valid(prb, seq, NULL)); /* LMM(syslog_print:A) */
1664 		mutex_lock(&syslog_lock);
1665 
1666 		if (len)
1667 			goto out;
1668 	} while (syslog_seq != seq);
1669 
1670 	/*
1671 	 * Copy records that fit into the buffer. The above cycle makes sure
1672 	 * that the first record is always available.
1673 	 */
1674 	do {
1675 		size_t n;
1676 		size_t skip;
1677 		int err;
1678 
1679 		if (!prb_read_valid(prb, syslog_seq, &r))
1680 			break;
1681 
1682 		if (r.info->seq != syslog_seq) {
1683 			/* message is gone, move to next valid one */
1684 			syslog_seq = r.info->seq;
1685 			syslog_partial = 0;
1686 		}
1687 
1688 		/*
1689 		 * To keep reading/counting partial line consistent,
1690 		 * use printk_time value as of the beginning of a line.
1691 		 */
1692 		if (!syslog_partial)
1693 			syslog_time = printk_time;
1694 
1695 		skip = syslog_partial;
1696 		n = record_print_text(&r, true, syslog_time);
1697 		if (n - syslog_partial <= size) {
1698 			/* message fits into buffer, move forward */
1699 			syslog_seq = r.info->seq + 1;
1700 			n -= syslog_partial;
1701 			syslog_partial = 0;
1702 		} else if (!len){
1703 			/* partial read(), remember position */
1704 			n = size;
1705 			syslog_partial += n;
1706 		} else
1707 			n = 0;
1708 
1709 		if (!n)
1710 			break;
1711 
1712 		mutex_unlock(&syslog_lock);
1713 		err = copy_to_user(buf, text + skip, n);
1714 		mutex_lock(&syslog_lock);
1715 
1716 		if (err) {
1717 			if (!len)
1718 				len = -EFAULT;
1719 			break;
1720 		}
1721 
1722 		len += n;
1723 		size -= n;
1724 		buf += n;
1725 	} while (size);
1726 out:
1727 	mutex_unlock(&syslog_lock);
1728 	kfree(text);
1729 	return len;
1730 }
1731 
1732 static int syslog_print_all(char __user *buf, int size, bool clear)
1733 {
1734 	struct printk_info info;
1735 	struct printk_record r;
1736 	char *text;
1737 	int len = 0;
1738 	u64 seq;
1739 	bool time;
1740 
1741 	text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
1742 	if (!text)
1743 		return -ENOMEM;
1744 
1745 	time = printk_time;
1746 	/*
1747 	 * Find first record that fits, including all following records,
1748 	 * into the user-provided buffer for this dump.
1749 	 */
1750 	seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1,
1751 				     size, true, time);
1752 
1753 	prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
1754 
1755 	len = 0;
1756 	prb_for_each_record(seq, prb, seq, &r) {
1757 		int textlen;
1758 
1759 		textlen = record_print_text(&r, true, time);
1760 
1761 		if (len + textlen > size) {
1762 			seq--;
1763 			break;
1764 		}
1765 
1766 		if (copy_to_user(buf + len, text, textlen))
1767 			len = -EFAULT;
1768 		else
1769 			len += textlen;
1770 
1771 		if (len < 0)
1772 			break;
1773 	}
1774 
1775 	if (clear) {
1776 		mutex_lock(&syslog_lock);
1777 		latched_seq_write(&clear_seq, seq);
1778 		mutex_unlock(&syslog_lock);
1779 	}
1780 
1781 	kfree(text);
1782 	return len;
1783 }
1784 
1785 static void syslog_clear(void)
1786 {
1787 	mutex_lock(&syslog_lock);
1788 	latched_seq_write(&clear_seq, prb_next_seq(prb));
1789 	mutex_unlock(&syslog_lock);
1790 }
1791 
1792 int do_syslog(int type, char __user *buf, int len, int source)
1793 {
1794 	struct printk_info info;
1795 	bool clear = false;
1796 	static int saved_console_loglevel = LOGLEVEL_DEFAULT;
1797 	int error;
1798 
1799 	error = check_syslog_permissions(type, source);
1800 	if (error)
1801 		return error;
1802 
1803 	switch (type) {
1804 	case SYSLOG_ACTION_CLOSE:	/* Close log */
1805 		break;
1806 	case SYSLOG_ACTION_OPEN:	/* Open log */
1807 		break;
1808 	case SYSLOG_ACTION_READ:	/* Read from log */
1809 		if (!buf || len < 0)
1810 			return -EINVAL;
1811 		if (!len)
1812 			return 0;
1813 		if (!access_ok(buf, len))
1814 			return -EFAULT;
1815 		error = syslog_print(buf, len);
1816 		break;
1817 	/* Read/clear last kernel messages */
1818 	case SYSLOG_ACTION_READ_CLEAR:
1819 		clear = true;
1820 		fallthrough;
1821 	/* Read last kernel messages */
1822 	case SYSLOG_ACTION_READ_ALL:
1823 		if (!buf || len < 0)
1824 			return -EINVAL;
1825 		if (!len)
1826 			return 0;
1827 		if (!access_ok(buf, len))
1828 			return -EFAULT;
1829 		error = syslog_print_all(buf, len, clear);
1830 		break;
1831 	/* Clear ring buffer */
1832 	case SYSLOG_ACTION_CLEAR:
1833 		syslog_clear();
1834 		break;
1835 	/* Disable logging to console */
1836 	case SYSLOG_ACTION_CONSOLE_OFF:
1837 		if (saved_console_loglevel == LOGLEVEL_DEFAULT)
1838 			saved_console_loglevel = console_loglevel;
1839 		console_loglevel = minimum_console_loglevel;
1840 		break;
1841 	/* Enable logging to console */
1842 	case SYSLOG_ACTION_CONSOLE_ON:
1843 		if (saved_console_loglevel != LOGLEVEL_DEFAULT) {
1844 			console_loglevel = saved_console_loglevel;
1845 			saved_console_loglevel = LOGLEVEL_DEFAULT;
1846 		}
1847 		break;
1848 	/* Set level of messages printed to console */
1849 	case SYSLOG_ACTION_CONSOLE_LEVEL:
1850 		if (len < 1 || len > 8)
1851 			return -EINVAL;
1852 		if (len < minimum_console_loglevel)
1853 			len = minimum_console_loglevel;
1854 		console_loglevel = len;
1855 		/* Implicitly re-enable logging to console */
1856 		saved_console_loglevel = LOGLEVEL_DEFAULT;
1857 		break;
1858 	/* Number of chars in the log buffer */
1859 	case SYSLOG_ACTION_SIZE_UNREAD:
1860 		mutex_lock(&syslog_lock);
1861 		if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) {
1862 			/* No unread messages. */
1863 			mutex_unlock(&syslog_lock);
1864 			return 0;
1865 		}
1866 		if (info.seq != syslog_seq) {
1867 			/* messages are gone, move to first one */
1868 			syslog_seq = info.seq;
1869 			syslog_partial = 0;
1870 		}
1871 		if (source == SYSLOG_FROM_PROC) {
1872 			/*
1873 			 * Short-cut for poll(/"proc/kmsg") which simply checks
1874 			 * for pending data, not the size; return the count of
1875 			 * records, not the length.
1876 			 */
1877 			error = prb_next_seq(prb) - syslog_seq;
1878 		} else {
1879 			bool time = syslog_partial ? syslog_time : printk_time;
1880 			unsigned int line_count;
1881 			u64 seq;
1882 
1883 			prb_for_each_info(syslog_seq, prb, seq, &info,
1884 					  &line_count) {
1885 				error += get_record_print_text_size(&info, line_count,
1886 								    true, time);
1887 				time = printk_time;
1888 			}
1889 			error -= syslog_partial;
1890 		}
1891 		mutex_unlock(&syslog_lock);
1892 		break;
1893 	/* Size of the log buffer */
1894 	case SYSLOG_ACTION_SIZE_BUFFER:
1895 		error = log_buf_len;
1896 		break;
1897 	default:
1898 		error = -EINVAL;
1899 		break;
1900 	}
1901 
1902 	return error;
1903 }
1904 
1905 SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
1906 {
1907 	return do_syslog(type, buf, len, SYSLOG_FROM_READER);
1908 }
1909 
1910 /*
1911  * Special console_lock variants that help to reduce the risk of soft-lockups.
1912  * They allow to pass console_lock to another printk() call using a busy wait.
1913  */
1914 
1915 #ifdef CONFIG_LOCKDEP
1916 static struct lockdep_map console_owner_dep_map = {
1917 	.name = "console_owner"
1918 };
1919 #endif
1920 
1921 static DEFINE_RAW_SPINLOCK(console_owner_lock);
1922 static struct task_struct *console_owner;
1923 static bool console_waiter;
1924 
1925 /**
1926  * console_lock_spinning_enable - mark beginning of code where another
1927  *	thread might safely busy wait
1928  *
1929  * This basically converts console_lock into a spinlock. This marks
1930  * the section where the console_lock owner can not sleep, because
1931  * there may be a waiter spinning (like a spinlock). Also it must be
1932  * ready to hand over the lock at the end of the section.
1933  */
1934 static void console_lock_spinning_enable(void)
1935 {
1936 	raw_spin_lock(&console_owner_lock);
1937 	console_owner = current;
1938 	raw_spin_unlock(&console_owner_lock);
1939 
1940 	/* The waiter may spin on us after setting console_owner */
1941 	spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
1942 }
1943 
1944 /**
1945  * console_lock_spinning_disable_and_check - mark end of code where another
1946  *	thread was able to busy wait and check if there is a waiter
1947  *
1948  * This is called at the end of the section where spinning is allowed.
1949  * It has two functions. First, it is a signal that it is no longer
1950  * safe to start busy waiting for the lock. Second, it checks if
1951  * there is a busy waiter and passes the lock rights to her.
1952  *
1953  * Important: Callers lose the lock if there was a busy waiter.
1954  *	They must not touch items synchronized by console_lock
1955  *	in this case.
1956  *
1957  * Return: 1 if the lock rights were passed, 0 otherwise.
1958  */
1959 static int console_lock_spinning_disable_and_check(void)
1960 {
1961 	int waiter;
1962 
1963 	raw_spin_lock(&console_owner_lock);
1964 	waiter = READ_ONCE(console_waiter);
1965 	console_owner = NULL;
1966 	raw_spin_unlock(&console_owner_lock);
1967 
1968 	if (!waiter) {
1969 		spin_release(&console_owner_dep_map, _THIS_IP_);
1970 		return 0;
1971 	}
1972 
1973 	/* The waiter is now free to continue */
1974 	WRITE_ONCE(console_waiter, false);
1975 
1976 	spin_release(&console_owner_dep_map, _THIS_IP_);
1977 
1978 	/*
1979 	 * Hand off console_lock to waiter. The waiter will perform
1980 	 * the up(). After this, the waiter is the console_lock owner.
1981 	 */
1982 	mutex_release(&console_lock_dep_map, _THIS_IP_);
1983 	return 1;
1984 }
1985 
1986 /**
1987  * console_trylock_spinning - try to get console_lock by busy waiting
1988  *
1989  * This allows to busy wait for the console_lock when the current
1990  * owner is running in specially marked sections. It means that
1991  * the current owner is running and cannot reschedule until it
1992  * is ready to lose the lock.
1993  *
1994  * Return: 1 if we got the lock, 0 othrewise
1995  */
1996 static int console_trylock_spinning(void)
1997 {
1998 	struct task_struct *owner = NULL;
1999 	bool waiter;
2000 	bool spin = false;
2001 	unsigned long flags;
2002 
2003 	if (console_trylock())
2004 		return 1;
2005 
2006 	/*
2007 	 * It's unsafe to spin once a panic has begun. If we are the
2008 	 * panic CPU, we may have already halted the owner of the
2009 	 * console_sem. If we are not the panic CPU, then we should
2010 	 * avoid taking console_sem, so the panic CPU has a better
2011 	 * chance of cleanly acquiring it later.
2012 	 */
2013 	if (panic_in_progress())
2014 		return 0;
2015 
2016 	printk_safe_enter_irqsave(flags);
2017 
2018 	raw_spin_lock(&console_owner_lock);
2019 	owner = READ_ONCE(console_owner);
2020 	waiter = READ_ONCE(console_waiter);
2021 	if (!waiter && owner && owner != current) {
2022 		WRITE_ONCE(console_waiter, true);
2023 		spin = true;
2024 	}
2025 	raw_spin_unlock(&console_owner_lock);
2026 
2027 	/*
2028 	 * If there is an active printk() writing to the
2029 	 * consoles, instead of having it write our data too,
2030 	 * see if we can offload that load from the active
2031 	 * printer, and do some printing ourselves.
2032 	 * Go into a spin only if there isn't already a waiter
2033 	 * spinning, and there is an active printer, and
2034 	 * that active printer isn't us (recursive printk?).
2035 	 */
2036 	if (!spin) {
2037 		printk_safe_exit_irqrestore(flags);
2038 		return 0;
2039 	}
2040 
2041 	/* We spin waiting for the owner to release us */
2042 	spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
2043 	/* Owner will clear console_waiter on hand off */
2044 	while (READ_ONCE(console_waiter))
2045 		cpu_relax();
2046 	spin_release(&console_owner_dep_map, _THIS_IP_);
2047 
2048 	printk_safe_exit_irqrestore(flags);
2049 	/*
2050 	 * The owner passed the console lock to us.
2051 	 * Since we did not spin on console lock, annotate
2052 	 * this as a trylock. Otherwise lockdep will
2053 	 * complain.
2054 	 */
2055 	mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
2056 
2057 	return 1;
2058 }
2059 
2060 /*
2061  * Call the specified console driver, asking it to write out the specified
2062  * text and length. If @dropped_text is non-NULL and any records have been
2063  * dropped, a dropped message will be written out first.
2064  */
2065 static void call_console_driver(struct console *con, const char *text, size_t len,
2066 				char *dropped_text)
2067 {
2068 	size_t dropped_len;
2069 
2070 	if (con->dropped && dropped_text) {
2071 		dropped_len = snprintf(dropped_text, DROPPED_TEXT_MAX,
2072 				       "** %lu printk messages dropped **\n",
2073 				       con->dropped);
2074 		con->dropped = 0;
2075 		con->write(con, dropped_text, dropped_len);
2076 	}
2077 
2078 	con->write(con, text, len);
2079 }
2080 
2081 /*
2082  * Recursion is tracked separately on each CPU. If NMIs are supported, an
2083  * additional NMI context per CPU is also separately tracked. Until per-CPU
2084  * is available, a separate "early tracking" is performed.
2085  */
2086 static DEFINE_PER_CPU(u8, printk_count);
2087 static u8 printk_count_early;
2088 #ifdef CONFIG_HAVE_NMI
2089 static DEFINE_PER_CPU(u8, printk_count_nmi);
2090 static u8 printk_count_nmi_early;
2091 #endif
2092 
2093 /*
2094  * Recursion is limited to keep the output sane. printk() should not require
2095  * more than 1 level of recursion (allowing, for example, printk() to trigger
2096  * a WARN), but a higher value is used in case some printk-internal errors
2097  * exist, such as the ringbuffer validation checks failing.
2098  */
2099 #define PRINTK_MAX_RECURSION 3
2100 
2101 /*
2102  * Return a pointer to the dedicated counter for the CPU+context of the
2103  * caller.
2104  */
2105 static u8 *__printk_recursion_counter(void)
2106 {
2107 #ifdef CONFIG_HAVE_NMI
2108 	if (in_nmi()) {
2109 		if (printk_percpu_data_ready())
2110 			return this_cpu_ptr(&printk_count_nmi);
2111 		return &printk_count_nmi_early;
2112 	}
2113 #endif
2114 	if (printk_percpu_data_ready())
2115 		return this_cpu_ptr(&printk_count);
2116 	return &printk_count_early;
2117 }
2118 
2119 /*
2120  * Enter recursion tracking. Interrupts are disabled to simplify tracking.
2121  * The caller must check the boolean return value to see if the recursion is
2122  * allowed. On failure, interrupts are not disabled.
2123  *
2124  * @recursion_ptr must be a variable of type (u8 *) and is the same variable
2125  * that is passed to printk_exit_irqrestore().
2126  */
2127 #define printk_enter_irqsave(recursion_ptr, flags)	\
2128 ({							\
2129 	bool success = true;				\
2130 							\
2131 	typecheck(u8 *, recursion_ptr);			\
2132 	local_irq_save(flags);				\
2133 	(recursion_ptr) = __printk_recursion_counter();	\
2134 	if (*(recursion_ptr) > PRINTK_MAX_RECURSION) {	\
2135 		local_irq_restore(flags);		\
2136 		success = false;			\
2137 	} else {					\
2138 		(*(recursion_ptr))++;			\
2139 	}						\
2140 	success;					\
2141 })
2142 
2143 /* Exit recursion tracking, restoring interrupts. */
2144 #define printk_exit_irqrestore(recursion_ptr, flags)	\
2145 	do {						\
2146 		typecheck(u8 *, recursion_ptr);		\
2147 		(*(recursion_ptr))--;			\
2148 		local_irq_restore(flags);		\
2149 	} while (0)
2150 
2151 int printk_delay_msec __read_mostly;
2152 
2153 static inline void printk_delay(int level)
2154 {
2155 	boot_delay_msec(level);
2156 
2157 	if (unlikely(printk_delay_msec)) {
2158 		int m = printk_delay_msec;
2159 
2160 		while (m--) {
2161 			mdelay(1);
2162 			touch_nmi_watchdog();
2163 		}
2164 	}
2165 }
2166 
2167 static inline u32 printk_caller_id(void)
2168 {
2169 	return in_task() ? task_pid_nr(current) :
2170 		0x80000000 + smp_processor_id();
2171 }
2172 
2173 /**
2174  * printk_parse_prefix - Parse level and control flags.
2175  *
2176  * @text:     The terminated text message.
2177  * @level:    A pointer to the current level value, will be updated.
2178  * @flags:    A pointer to the current printk_info flags, will be updated.
2179  *
2180  * @level may be NULL if the caller is not interested in the parsed value.
2181  * Otherwise the variable pointed to by @level must be set to
2182  * LOGLEVEL_DEFAULT in order to be updated with the parsed value.
2183  *
2184  * @flags may be NULL if the caller is not interested in the parsed value.
2185  * Otherwise the variable pointed to by @flags will be OR'd with the parsed
2186  * value.
2187  *
2188  * Return: The length of the parsed level and control flags.
2189  */
2190 u16 printk_parse_prefix(const char *text, int *level,
2191 			enum printk_info_flags *flags)
2192 {
2193 	u16 prefix_len = 0;
2194 	int kern_level;
2195 
2196 	while (*text) {
2197 		kern_level = printk_get_level(text);
2198 		if (!kern_level)
2199 			break;
2200 
2201 		switch (kern_level) {
2202 		case '0' ... '7':
2203 			if (level && *level == LOGLEVEL_DEFAULT)
2204 				*level = kern_level - '0';
2205 			break;
2206 		case 'c':	/* KERN_CONT */
2207 			if (flags)
2208 				*flags |= LOG_CONT;
2209 		}
2210 
2211 		prefix_len += 2;
2212 		text += 2;
2213 	}
2214 
2215 	return prefix_len;
2216 }
2217 
2218 __printf(5, 0)
2219 static u16 printk_sprint(char *text, u16 size, int facility,
2220 			 enum printk_info_flags *flags, const char *fmt,
2221 			 va_list args)
2222 {
2223 	u16 text_len;
2224 
2225 	text_len = vscnprintf(text, size, fmt, args);
2226 
2227 	/* Mark and strip a trailing newline. */
2228 	if (text_len && text[text_len - 1] == '\n') {
2229 		text_len--;
2230 		*flags |= LOG_NEWLINE;
2231 	}
2232 
2233 	/* Strip log level and control flags. */
2234 	if (facility == 0) {
2235 		u16 prefix_len;
2236 
2237 		prefix_len = printk_parse_prefix(text, NULL, NULL);
2238 		if (prefix_len) {
2239 			text_len -= prefix_len;
2240 			memmove(text, text + prefix_len, text_len);
2241 		}
2242 	}
2243 
2244 	trace_console_rcuidle(text, text_len);
2245 
2246 	return text_len;
2247 }
2248 
2249 __printf(4, 0)
2250 int vprintk_store(int facility, int level,
2251 		  const struct dev_printk_info *dev_info,
2252 		  const char *fmt, va_list args)
2253 {
2254 	struct prb_reserved_entry e;
2255 	enum printk_info_flags flags = 0;
2256 	struct printk_record r;
2257 	unsigned long irqflags;
2258 	u16 trunc_msg_len = 0;
2259 	char prefix_buf[8];
2260 	u8 *recursion_ptr;
2261 	u16 reserve_size;
2262 	va_list args2;
2263 	u32 caller_id;
2264 	u16 text_len;
2265 	int ret = 0;
2266 	u64 ts_nsec;
2267 
2268 	if (!printk_enter_irqsave(recursion_ptr, irqflags))
2269 		return 0;
2270 
2271 	/*
2272 	 * Since the duration of printk() can vary depending on the message
2273 	 * and state of the ringbuffer, grab the timestamp now so that it is
2274 	 * close to the call of printk(). This provides a more deterministic
2275 	 * timestamp with respect to the caller.
2276 	 */
2277 	ts_nsec = local_clock();
2278 
2279 	caller_id = printk_caller_id();
2280 
2281 	/*
2282 	 * The sprintf needs to come first since the syslog prefix might be
2283 	 * passed in as a parameter. An extra byte must be reserved so that
2284 	 * later the vscnprintf() into the reserved buffer has room for the
2285 	 * terminating '\0', which is not counted by vsnprintf().
2286 	 */
2287 	va_copy(args2, args);
2288 	reserve_size = vsnprintf(&prefix_buf[0], sizeof(prefix_buf), fmt, args2) + 1;
2289 	va_end(args2);
2290 
2291 	if (reserve_size > LOG_LINE_MAX)
2292 		reserve_size = LOG_LINE_MAX;
2293 
2294 	/* Extract log level or control flags. */
2295 	if (facility == 0)
2296 		printk_parse_prefix(&prefix_buf[0], &level, &flags);
2297 
2298 	if (level == LOGLEVEL_DEFAULT)
2299 		level = default_message_loglevel;
2300 
2301 	if (dev_info)
2302 		flags |= LOG_NEWLINE;
2303 
2304 	if (flags & LOG_CONT) {
2305 		prb_rec_init_wr(&r, reserve_size);
2306 		if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) {
2307 			text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size,
2308 						 facility, &flags, fmt, args);
2309 			r.info->text_len += text_len;
2310 
2311 			if (flags & LOG_NEWLINE) {
2312 				r.info->flags |= LOG_NEWLINE;
2313 				prb_final_commit(&e);
2314 			} else {
2315 				prb_commit(&e);
2316 			}
2317 
2318 			ret = text_len;
2319 			goto out;
2320 		}
2321 	}
2322 
2323 	/*
2324 	 * Explicitly initialize the record before every prb_reserve() call.
2325 	 * prb_reserve_in_last() and prb_reserve() purposely invalidate the
2326 	 * structure when they fail.
2327 	 */
2328 	prb_rec_init_wr(&r, reserve_size);
2329 	if (!prb_reserve(&e, prb, &r)) {
2330 		/* truncate the message if it is too long for empty buffer */
2331 		truncate_msg(&reserve_size, &trunc_msg_len);
2332 
2333 		prb_rec_init_wr(&r, reserve_size + trunc_msg_len);
2334 		if (!prb_reserve(&e, prb, &r))
2335 			goto out;
2336 	}
2337 
2338 	/* fill message */
2339 	text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args);
2340 	if (trunc_msg_len)
2341 		memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len);
2342 	r.info->text_len = text_len + trunc_msg_len;
2343 	r.info->facility = facility;
2344 	r.info->level = level & 7;
2345 	r.info->flags = flags & 0x1f;
2346 	r.info->ts_nsec = ts_nsec;
2347 	r.info->caller_id = caller_id;
2348 	if (dev_info)
2349 		memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info));
2350 
2351 	/* A message without a trailing newline can be continued. */
2352 	if (!(flags & LOG_NEWLINE))
2353 		prb_commit(&e);
2354 	else
2355 		prb_final_commit(&e);
2356 
2357 	ret = text_len + trunc_msg_len;
2358 out:
2359 	printk_exit_irqrestore(recursion_ptr, irqflags);
2360 	return ret;
2361 }
2362 
2363 asmlinkage int vprintk_emit(int facility, int level,
2364 			    const struct dev_printk_info *dev_info,
2365 			    const char *fmt, va_list args)
2366 {
2367 	int printed_len;
2368 	bool in_sched = false;
2369 
2370 	/* Suppress unimportant messages after panic happens */
2371 	if (unlikely(suppress_printk))
2372 		return 0;
2373 
2374 	if (unlikely(suppress_panic_printk) &&
2375 	    atomic_read(&panic_cpu) != raw_smp_processor_id())
2376 		return 0;
2377 
2378 	if (level == LOGLEVEL_SCHED) {
2379 		level = LOGLEVEL_DEFAULT;
2380 		in_sched = true;
2381 	}
2382 
2383 	printk_delay(level);
2384 
2385 	printed_len = vprintk_store(facility, level, dev_info, fmt, args);
2386 
2387 	/* If called from the scheduler, we can not call up(). */
2388 	if (!in_sched && allow_direct_printing()) {
2389 		/*
2390 		 * The caller may be holding system-critical or
2391 		 * timing-sensitive locks. Disable preemption during direct
2392 		 * printing of all remaining records to all consoles so that
2393 		 * this context can return as soon as possible. Hopefully
2394 		 * another printk() caller will take over the printing.
2395 		 */
2396 		preempt_disable();
2397 		/*
2398 		 * Try to acquire and then immediately release the console
2399 		 * semaphore. The release will print out buffers. With the
2400 		 * spinning variant, this context tries to take over the
2401 		 * printing from another printing context.
2402 		 */
2403 		if (console_trylock_spinning())
2404 			console_unlock();
2405 		preempt_enable();
2406 	}
2407 
2408 	wake_up_klogd();
2409 	return printed_len;
2410 }
2411 EXPORT_SYMBOL(vprintk_emit);
2412 
2413 int vprintk_default(const char *fmt, va_list args)
2414 {
2415 	return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
2416 }
2417 EXPORT_SYMBOL_GPL(vprintk_default);
2418 
2419 asmlinkage __visible int _printk(const char *fmt, ...)
2420 {
2421 	va_list args;
2422 	int r;
2423 
2424 	va_start(args, fmt);
2425 	r = vprintk(fmt, args);
2426 	va_end(args);
2427 
2428 	return r;
2429 }
2430 EXPORT_SYMBOL(_printk);
2431 
2432 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress);
2433 
2434 static void printk_start_kthread(struct console *con);
2435 
2436 #else /* CONFIG_PRINTK */
2437 
2438 #define CONSOLE_LOG_MAX		0
2439 #define DROPPED_TEXT_MAX	0
2440 #define printk_time		false
2441 
2442 #define prb_read_valid(rb, seq, r)	false
2443 #define prb_first_valid_seq(rb)		0
2444 #define prb_next_seq(rb)		0
2445 
2446 static u64 syslog_seq;
2447 
2448 static size_t record_print_text(const struct printk_record *r,
2449 				bool syslog, bool time)
2450 {
2451 	return 0;
2452 }
2453 static ssize_t info_print_ext_header(char *buf, size_t size,
2454 				     struct printk_info *info)
2455 {
2456 	return 0;
2457 }
2458 static ssize_t msg_print_ext_body(char *buf, size_t size,
2459 				  char *text, size_t text_len,
2460 				  struct dev_printk_info *dev_info) { return 0; }
2461 static void console_lock_spinning_enable(void) { }
2462 static int console_lock_spinning_disable_and_check(void) { return 0; }
2463 static void call_console_driver(struct console *con, const char *text, size_t len,
2464 				char *dropped_text)
2465 {
2466 }
2467 static bool suppress_message_printing(int level) { return false; }
2468 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; }
2469 static void printk_start_kthread(struct console *con) { }
2470 static bool allow_direct_printing(void) { return true; }
2471 
2472 #endif /* CONFIG_PRINTK */
2473 
2474 #ifdef CONFIG_EARLY_PRINTK
2475 struct console *early_console;
2476 
2477 asmlinkage __visible void early_printk(const char *fmt, ...)
2478 {
2479 	va_list ap;
2480 	char buf[512];
2481 	int n;
2482 
2483 	if (!early_console)
2484 		return;
2485 
2486 	va_start(ap, fmt);
2487 	n = vscnprintf(buf, sizeof(buf), fmt, ap);
2488 	va_end(ap);
2489 
2490 	early_console->write(early_console, buf, n);
2491 }
2492 #endif
2493 
2494 static void set_user_specified(struct console_cmdline *c, bool user_specified)
2495 {
2496 	if (!user_specified)
2497 		return;
2498 
2499 	/*
2500 	 * @c console was defined by the user on the command line.
2501 	 * Do not clear when added twice also by SPCR or the device tree.
2502 	 */
2503 	c->user_specified = true;
2504 	/* At least one console defined by the user on the command line. */
2505 	console_set_on_cmdline = 1;
2506 }
2507 
2508 static int __add_preferred_console(char *name, int idx, char *options,
2509 				   char *brl_options, bool user_specified)
2510 {
2511 	struct console_cmdline *c;
2512 	int i;
2513 
2514 	/*
2515 	 *	See if this tty is not yet registered, and
2516 	 *	if we have a slot free.
2517 	 */
2518 	for (i = 0, c = console_cmdline;
2519 	     i < MAX_CMDLINECONSOLES && c->name[0];
2520 	     i++, c++) {
2521 		if (strcmp(c->name, name) == 0 && c->index == idx) {
2522 			if (!brl_options)
2523 				preferred_console = i;
2524 			set_user_specified(c, user_specified);
2525 			return 0;
2526 		}
2527 	}
2528 	if (i == MAX_CMDLINECONSOLES)
2529 		return -E2BIG;
2530 	if (!brl_options)
2531 		preferred_console = i;
2532 	strlcpy(c->name, name, sizeof(c->name));
2533 	c->options = options;
2534 	set_user_specified(c, user_specified);
2535 	braille_set_options(c, brl_options);
2536 
2537 	c->index = idx;
2538 	return 0;
2539 }
2540 
2541 static int __init console_msg_format_setup(char *str)
2542 {
2543 	if (!strcmp(str, "syslog"))
2544 		console_msg_format = MSG_FORMAT_SYSLOG;
2545 	if (!strcmp(str, "default"))
2546 		console_msg_format = MSG_FORMAT_DEFAULT;
2547 	return 1;
2548 }
2549 __setup("console_msg_format=", console_msg_format_setup);
2550 
2551 /*
2552  * Set up a console.  Called via do_early_param() in init/main.c
2553  * for each "console=" parameter in the boot command line.
2554  */
2555 static int __init console_setup(char *str)
2556 {
2557 	char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */
2558 	char *s, *options, *brl_options = NULL;
2559 	int idx;
2560 
2561 	/*
2562 	 * console="" or console=null have been suggested as a way to
2563 	 * disable console output. Use ttynull that has been created
2564 	 * for exactly this purpose.
2565 	 */
2566 	if (str[0] == 0 || strcmp(str, "null") == 0) {
2567 		__add_preferred_console("ttynull", 0, NULL, NULL, true);
2568 		return 1;
2569 	}
2570 
2571 	if (_braille_console_setup(&str, &brl_options))
2572 		return 1;
2573 
2574 	/*
2575 	 * Decode str into name, index, options.
2576 	 */
2577 	if (str[0] >= '0' && str[0] <= '9') {
2578 		strcpy(buf, "ttyS");
2579 		strncpy(buf + 4, str, sizeof(buf) - 5);
2580 	} else {
2581 		strncpy(buf, str, sizeof(buf) - 1);
2582 	}
2583 	buf[sizeof(buf) - 1] = 0;
2584 	options = strchr(str, ',');
2585 	if (options)
2586 		*(options++) = 0;
2587 #ifdef __sparc__
2588 	if (!strcmp(str, "ttya"))
2589 		strcpy(buf, "ttyS0");
2590 	if (!strcmp(str, "ttyb"))
2591 		strcpy(buf, "ttyS1");
2592 #endif
2593 	for (s = buf; *s; s++)
2594 		if (isdigit(*s) || *s == ',')
2595 			break;
2596 	idx = simple_strtoul(s, NULL, 10);
2597 	*s = 0;
2598 
2599 	__add_preferred_console(buf, idx, options, brl_options, true);
2600 	return 1;
2601 }
2602 __setup("console=", console_setup);
2603 
2604 /**
2605  * add_preferred_console - add a device to the list of preferred consoles.
2606  * @name: device name
2607  * @idx: device index
2608  * @options: options for this console
2609  *
2610  * The last preferred console added will be used for kernel messages
2611  * and stdin/out/err for init.  Normally this is used by console_setup
2612  * above to handle user-supplied console arguments; however it can also
2613  * be used by arch-specific code either to override the user or more
2614  * commonly to provide a default console (ie from PROM variables) when
2615  * the user has not supplied one.
2616  */
2617 int add_preferred_console(char *name, int idx, char *options)
2618 {
2619 	return __add_preferred_console(name, idx, options, NULL, false);
2620 }
2621 
2622 bool console_suspend_enabled = true;
2623 EXPORT_SYMBOL(console_suspend_enabled);
2624 
2625 static int __init console_suspend_disable(char *str)
2626 {
2627 	console_suspend_enabled = false;
2628 	return 1;
2629 }
2630 __setup("no_console_suspend", console_suspend_disable);
2631 module_param_named(console_suspend, console_suspend_enabled,
2632 		bool, S_IRUGO | S_IWUSR);
2633 MODULE_PARM_DESC(console_suspend, "suspend console during suspend"
2634 	" and hibernate operations");
2635 
2636 static bool printk_console_no_auto_verbose;
2637 
2638 void console_verbose(void)
2639 {
2640 	if (console_loglevel && !printk_console_no_auto_verbose)
2641 		console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
2642 }
2643 EXPORT_SYMBOL_GPL(console_verbose);
2644 
2645 module_param_named(console_no_auto_verbose, printk_console_no_auto_verbose, bool, 0644);
2646 MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to highest on oops/panic/etc");
2647 
2648 /**
2649  * suspend_console - suspend the console subsystem
2650  *
2651  * This disables printk() while we go into suspend states
2652  */
2653 void suspend_console(void)
2654 {
2655 	if (!console_suspend_enabled)
2656 		return;
2657 	pr_info("Suspending console(s) (use no_console_suspend to debug)\n");
2658 	pr_flush(1000, true);
2659 	console_lock();
2660 	console_suspended = 1;
2661 	up_console_sem();
2662 }
2663 
2664 void resume_console(void)
2665 {
2666 	if (!console_suspend_enabled)
2667 		return;
2668 	down_console_sem();
2669 	console_suspended = 0;
2670 	console_unlock();
2671 	pr_flush(1000, true);
2672 }
2673 
2674 /**
2675  * console_cpu_notify - print deferred console messages after CPU hotplug
2676  * @cpu: unused
2677  *
2678  * If printk() is called from a CPU that is not online yet, the messages
2679  * will be printed on the console only if there are CON_ANYTIME consoles.
2680  * This function is called when a new CPU comes online (or fails to come
2681  * up) or goes offline.
2682  */
2683 static int console_cpu_notify(unsigned int cpu)
2684 {
2685 	if (!cpuhp_tasks_frozen) {
2686 		/* If trylock fails, someone else is doing the printing */
2687 		if (console_trylock())
2688 			console_unlock();
2689 		else {
2690 			/*
2691 			 * If a new CPU comes online, the conditions for
2692 			 * printer_should_wake() may have changed for some
2693 			 * kthread printer with !CON_ANYTIME.
2694 			 */
2695 			wake_up_klogd();
2696 		}
2697 	}
2698 	return 0;
2699 }
2700 
2701 /**
2702  * console_lock - lock the console system for exclusive use.
2703  *
2704  * Acquires a lock which guarantees that the caller has
2705  * exclusive access to the console system and the console_drivers list.
2706  *
2707  * Can sleep, returns nothing.
2708  */
2709 void console_lock(void)
2710 {
2711 	might_sleep();
2712 
2713 	down_console_sem();
2714 	if (console_suspended)
2715 		return;
2716 	console_kthreads_block();
2717 	console_may_schedule = 1;
2718 }
2719 EXPORT_SYMBOL(console_lock);
2720 
2721 /**
2722  * console_trylock - try to lock the console system for exclusive use.
2723  *
2724  * Try to acquire a lock which guarantees that the caller has exclusive
2725  * access to the console system and the console_drivers list.
2726  *
2727  * returns 1 on success, and 0 on failure to acquire the lock.
2728  */
2729 int console_trylock(void)
2730 {
2731 	if (down_trylock_console_sem())
2732 		return 0;
2733 	if (console_suspended) {
2734 		up_console_sem();
2735 		return 0;
2736 	}
2737 	if (!console_kthreads_atomic_tryblock()) {
2738 		up_console_sem();
2739 		return 0;
2740 	}
2741 	console_may_schedule = 0;
2742 	return 1;
2743 }
2744 EXPORT_SYMBOL(console_trylock);
2745 
2746 /*
2747  * This is used to help to make sure that certain paths within the VT code are
2748  * running with the console lock held. It is definitely not the perfect debug
2749  * tool (it is not known if the VT code is the task holding the console lock),
2750  * but it helps tracking those weird code paths in the console code such as
2751  * when the console is suspended: where the console is not locked but no
2752  * console printing may occur.
2753  *
2754  * Note: This returns true when the console is suspended but is not locked.
2755  *       This is intentional because the VT code must consider that situation
2756  *       the same as if the console was locked.
2757  */
2758 int is_console_locked(void)
2759 {
2760 	return (console_kthreads_blocked || atomic_read(&console_kthreads_active));
2761 }
2762 EXPORT_SYMBOL(is_console_locked);
2763 
2764 /*
2765  * Return true when this CPU should unlock console_sem without pushing all
2766  * messages to the console. This reduces the chance that the console is
2767  * locked when the panic CPU tries to use it.
2768  */
2769 static bool abandon_console_lock_in_panic(void)
2770 {
2771 	if (!panic_in_progress())
2772 		return false;
2773 
2774 	/*
2775 	 * We can use raw_smp_processor_id() here because it is impossible for
2776 	 * the task to be migrated to the panic_cpu, or away from it. If
2777 	 * panic_cpu has already been set, and we're not currently executing on
2778 	 * that CPU, then we never will be.
2779 	 */
2780 	return atomic_read(&panic_cpu) != raw_smp_processor_id();
2781 }
2782 
2783 static inline bool __console_is_usable(short flags)
2784 {
2785 	if (!(flags & CON_ENABLED))
2786 		return false;
2787 
2788 	/*
2789 	 * Console drivers may assume that per-cpu resources have been
2790 	 * allocated. So unless they're explicitly marked as being able to
2791 	 * cope (CON_ANYTIME) don't call them until this CPU is officially up.
2792 	 */
2793 	if (!cpu_online(raw_smp_processor_id()) &&
2794 	    !(flags & CON_ANYTIME))
2795 		return false;
2796 
2797 	return true;
2798 }
2799 
2800 /*
2801  * Check if the given console is currently capable and allowed to print
2802  * records.
2803  *
2804  * Requires holding the console_lock.
2805  */
2806 static inline bool console_is_usable(struct console *con)
2807 {
2808 	if (!con->write)
2809 		return false;
2810 
2811 	return __console_is_usable(con->flags);
2812 }
2813 
2814 static void __console_unlock(void)
2815 {
2816 	/*
2817 	 * Depending on whether console_lock() or console_trylock() was used,
2818 	 * appropriately allow the kthread printers to continue.
2819 	 */
2820 	if (console_kthreads_blocked)
2821 		console_kthreads_unblock();
2822 	else
2823 		console_kthreads_atomic_unblock();
2824 
2825 	/*
2826 	 * New records may have arrived while the console was locked.
2827 	 * Wake the kthread printers to print them.
2828 	 */
2829 	wake_up_klogd();
2830 
2831 	up_console_sem();
2832 }
2833 
2834 /*
2835  * Print one record for the given console. The record printed is whatever
2836  * record is the next available record for the given console.
2837  *
2838  * @text is a buffer of size CONSOLE_LOG_MAX.
2839  *
2840  * If extended messages should be printed, @ext_text is a buffer of size
2841  * CONSOLE_EXT_LOG_MAX. Otherwise @ext_text must be NULL.
2842  *
2843  * If dropped messages should be printed, @dropped_text is a buffer of size
2844  * DROPPED_TEXT_MAX. Otherwise @dropped_text must be NULL.
2845  *
2846  * @handover will be set to true if a printk waiter has taken over the
2847  * console_lock, in which case the caller is no longer holding the
2848  * console_lock. Otherwise it is set to false. A NULL pointer may be provided
2849  * to disable allowing the console_lock to be taken over by a printk waiter.
2850  *
2851  * Returns false if the given console has no next record to print, otherwise
2852  * true.
2853  *
2854  * Requires the console_lock if @handover is non-NULL.
2855  * Requires con->lock otherwise.
2856  */
2857 static bool __console_emit_next_record(struct console *con, char *text, char *ext_text,
2858 				       char *dropped_text, bool *handover)
2859 {
2860 	static atomic_t panic_console_dropped = ATOMIC_INIT(0);
2861 	struct printk_info info;
2862 	struct printk_record r;
2863 	unsigned long flags;
2864 	char *write_text;
2865 	size_t len;
2866 
2867 	prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
2868 
2869 	if (handover)
2870 		*handover = false;
2871 
2872 	if (!prb_read_valid(prb, con->seq, &r))
2873 		return false;
2874 
2875 	if (con->seq != r.info->seq) {
2876 		con->dropped += r.info->seq - con->seq;
2877 		con->seq = r.info->seq;
2878 		if (panic_in_progress() &&
2879 		    atomic_fetch_inc_relaxed(&panic_console_dropped) > 10) {
2880 			suppress_panic_printk = 1;
2881 			pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n");
2882 		}
2883 	}
2884 
2885 	/* Skip record that has level above the console loglevel. */
2886 	if (suppress_message_printing(r.info->level)) {
2887 		con->seq++;
2888 		goto skip;
2889 	}
2890 
2891 	if (ext_text) {
2892 		write_text = ext_text;
2893 		len = info_print_ext_header(ext_text, CONSOLE_EXT_LOG_MAX, r.info);
2894 		len += msg_print_ext_body(ext_text + len, CONSOLE_EXT_LOG_MAX - len,
2895 					  &r.text_buf[0], r.info->text_len, &r.info->dev_info);
2896 	} else {
2897 		write_text = text;
2898 		len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
2899 	}
2900 
2901 	if (handover) {
2902 		/*
2903 		 * While actively printing out messages, if another printk()
2904 		 * were to occur on another CPU, it may wait for this one to
2905 		 * finish. This task can not be preempted if there is a
2906 		 * waiter waiting to take over.
2907 		 *
2908 		 * Interrupts are disabled because the hand over to a waiter
2909 		 * must not be interrupted until the hand over is completed
2910 		 * (@console_waiter is cleared).
2911 		 */
2912 		printk_safe_enter_irqsave(flags);
2913 		console_lock_spinning_enable();
2914 
2915 		/* don't trace irqsoff print latency */
2916 		stop_critical_timings();
2917 	}
2918 
2919 	call_console_driver(con, write_text, len, dropped_text);
2920 
2921 	con->seq++;
2922 
2923 	if (handover) {
2924 		start_critical_timings();
2925 		*handover = console_lock_spinning_disable_and_check();
2926 		printk_safe_exit_irqrestore(flags);
2927 	}
2928 skip:
2929 	return true;
2930 }
2931 
2932 /*
2933  * Print a record for a given console, but allow another printk() caller to
2934  * take over the console_lock and continue printing.
2935  *
2936  * Requires the console_lock, but depending on @handover after the call, the
2937  * caller may no longer have the console_lock.
2938  *
2939  * See __console_emit_next_record() for argument and return details.
2940  */
2941 static bool console_emit_next_record_transferable(struct console *con, char *text, char *ext_text,
2942 						  char *dropped_text, bool *handover)
2943 {
2944 	/*
2945 	 * Handovers are only supported if threaded printers are atomically
2946 	 * blocked. The context taking over the console_lock may be atomic.
2947 	 */
2948 	if (!console_kthreads_atomically_blocked()) {
2949 		*handover = false;
2950 		handover = NULL;
2951 	}
2952 
2953 	return __console_emit_next_record(con, text, ext_text, dropped_text, handover);
2954 }
2955 
2956 /*
2957  * Print out all remaining records to all consoles.
2958  *
2959  * @do_cond_resched is set by the caller. It can be true only in schedulable
2960  * context.
2961  *
2962  * @next_seq is set to the sequence number after the last available record.
2963  * The value is valid only when this function returns true. It means that all
2964  * usable consoles are completely flushed.
2965  *
2966  * @handover will be set to true if a printk waiter has taken over the
2967  * console_lock, in which case the caller is no longer holding the
2968  * console_lock. Otherwise it is set to false.
2969  *
2970  * Returns true when there was at least one usable console and all messages
2971  * were flushed to all usable consoles. A returned false informs the caller
2972  * that everything was not flushed (either there were no usable consoles or
2973  * another context has taken over printing or it is a panic situation and this
2974  * is not the panic CPU or direct printing is not preferred). Regardless the
2975  * reason, the caller should assume it is not useful to immediately try again.
2976  *
2977  * Requires the console_lock.
2978  */
2979 static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover)
2980 {
2981 	static char dropped_text[DROPPED_TEXT_MAX];
2982 	static char ext_text[CONSOLE_EXT_LOG_MAX];
2983 	static char text[CONSOLE_LOG_MAX];
2984 	bool any_usable = false;
2985 	struct console *con;
2986 	bool any_progress;
2987 
2988 	*next_seq = 0;
2989 	*handover = false;
2990 
2991 	do {
2992 		/* Let the kthread printers do the work if they can. */
2993 		if (!allow_direct_printing())
2994 			return false;
2995 
2996 		any_progress = false;
2997 
2998 		for_each_console(con) {
2999 			bool progress;
3000 
3001 			if (!console_is_usable(con))
3002 				continue;
3003 			any_usable = true;
3004 
3005 			if (con->flags & CON_EXTENDED) {
3006 				/* Extended consoles do not print "dropped messages". */
3007 				progress = console_emit_next_record_transferable(con, &text[0],
3008 								&ext_text[0], NULL, handover);
3009 			} else {
3010 				progress = console_emit_next_record_transferable(con, &text[0],
3011 								NULL, &dropped_text[0], handover);
3012 			}
3013 			if (*handover)
3014 				return false;
3015 
3016 			/* Track the next of the highest seq flushed. */
3017 			if (con->seq > *next_seq)
3018 				*next_seq = con->seq;
3019 
3020 			if (!progress)
3021 				continue;
3022 			any_progress = true;
3023 
3024 			/* Allow panic_cpu to take over the consoles safely. */
3025 			if (abandon_console_lock_in_panic())
3026 				return false;
3027 
3028 			if (do_cond_resched)
3029 				cond_resched();
3030 		}
3031 	} while (any_progress);
3032 
3033 	return any_usable;
3034 }
3035 
3036 /**
3037  * console_unlock - unlock the console system
3038  *
3039  * Releases the console_lock which the caller holds on the console system
3040  * and the console driver list.
3041  *
3042  * While the console_lock was held, console output may have been buffered
3043  * by printk().  If this is the case, console_unlock(); emits
3044  * the output prior to releasing the lock.
3045  *
3046  * console_unlock(); may be called from any context.
3047  */
3048 void console_unlock(void)
3049 {
3050 	bool do_cond_resched;
3051 	bool handover;
3052 	bool flushed;
3053 	u64 next_seq;
3054 
3055 	if (console_suspended) {
3056 		up_console_sem();
3057 		return;
3058 	}
3059 
3060 	/*
3061 	 * Console drivers are called with interrupts disabled, so
3062 	 * @console_may_schedule should be cleared before; however, we may
3063 	 * end up dumping a lot of lines, for example, if called from
3064 	 * console registration path, and should invoke cond_resched()
3065 	 * between lines if allowable.  Not doing so can cause a very long
3066 	 * scheduling stall on a slow console leading to RCU stall and
3067 	 * softlockup warnings which exacerbate the issue with more
3068 	 * messages practically incapacitating the system. Therefore, create
3069 	 * a local to use for the printing loop.
3070 	 */
3071 	do_cond_resched = console_may_schedule;
3072 
3073 	do {
3074 		console_may_schedule = 0;
3075 
3076 		flushed = console_flush_all(do_cond_resched, &next_seq, &handover);
3077 		if (!handover)
3078 			__console_unlock();
3079 
3080 		/*
3081 		 * Abort if there was a failure to flush all messages to all
3082 		 * usable consoles. Either it is not possible to flush (in
3083 		 * which case it would be an infinite loop of retrying) or
3084 		 * another context has taken over printing.
3085 		 */
3086 		if (!flushed)
3087 			break;
3088 
3089 		/*
3090 		 * Some context may have added new records after
3091 		 * console_flush_all() but before unlocking the console.
3092 		 * Re-check if there is a new record to flush. If the trylock
3093 		 * fails, another context is already handling the printing.
3094 		 */
3095 	} while (prb_read_valid(prb, next_seq, NULL) && console_trylock());
3096 }
3097 EXPORT_SYMBOL(console_unlock);
3098 
3099 /**
3100  * console_conditional_schedule - yield the CPU if required
3101  *
3102  * If the console code is currently allowed to sleep, and
3103  * if this CPU should yield the CPU to another task, do
3104  * so here.
3105  *
3106  * Must be called within console_lock();.
3107  */
3108 void __sched console_conditional_schedule(void)
3109 {
3110 	if (console_may_schedule)
3111 		cond_resched();
3112 }
3113 EXPORT_SYMBOL(console_conditional_schedule);
3114 
3115 void console_unblank(void)
3116 {
3117 	struct console *c;
3118 
3119 	/*
3120 	 * console_unblank can no longer be called in interrupt context unless
3121 	 * oops_in_progress is set to 1..
3122 	 */
3123 	if (oops_in_progress) {
3124 		if (down_trylock_console_sem() != 0)
3125 			return;
3126 		if (!console_kthreads_atomic_tryblock()) {
3127 			up_console_sem();
3128 			return;
3129 		}
3130 	} else
3131 		console_lock();
3132 
3133 	console_may_schedule = 0;
3134 	for_each_console(c)
3135 		if ((c->flags & CON_ENABLED) && c->unblank)
3136 			c->unblank();
3137 	console_unlock();
3138 
3139 	if (!oops_in_progress)
3140 		pr_flush(1000, true);
3141 }
3142 
3143 /**
3144  * console_flush_on_panic - flush console content on panic
3145  * @mode: flush all messages in buffer or just the pending ones
3146  *
3147  * Immediately output all pending messages no matter what.
3148  */
3149 void console_flush_on_panic(enum con_flush_mode mode)
3150 {
3151 	/*
3152 	 * If someone else is holding the console lock, trylock will fail
3153 	 * and may_schedule may be set.  Ignore and proceed to unlock so
3154 	 * that messages are flushed out.  As this can be called from any
3155 	 * context and we don't want to get preempted while flushing,
3156 	 * ensure may_schedule is cleared.
3157 	 */
3158 	console_trylock();
3159 	console_may_schedule = 0;
3160 
3161 	if (mode == CONSOLE_REPLAY_ALL) {
3162 		struct console *c;
3163 		u64 seq;
3164 
3165 		seq = prb_first_valid_seq(prb);
3166 		for_each_console(c)
3167 			c->seq = seq;
3168 	}
3169 	console_unlock();
3170 }
3171 
3172 /*
3173  * Return the console tty driver structure and its associated index
3174  */
3175 struct tty_driver *console_device(int *index)
3176 {
3177 	struct console *c;
3178 	struct tty_driver *driver = NULL;
3179 
3180 	console_lock();
3181 	for_each_console(c) {
3182 		if (!c->device)
3183 			continue;
3184 		driver = c->device(c, index);
3185 		if (driver)
3186 			break;
3187 	}
3188 	console_unlock();
3189 	return driver;
3190 }
3191 
3192 /*
3193  * Prevent further output on the passed console device so that (for example)
3194  * serial drivers can disable console output before suspending a port, and can
3195  * re-enable output afterwards.
3196  */
3197 void console_stop(struct console *console)
3198 {
3199 	__pr_flush(console, 1000, true);
3200 	console_lock();
3201 	console->flags &= ~CON_ENABLED;
3202 	console_unlock();
3203 }
3204 EXPORT_SYMBOL(console_stop);
3205 
3206 void console_start(struct console *console)
3207 {
3208 	console_lock();
3209 	console->flags |= CON_ENABLED;
3210 	console_unlock();
3211 	__pr_flush(console, 1000, true);
3212 }
3213 EXPORT_SYMBOL(console_start);
3214 
3215 static int __read_mostly keep_bootcon;
3216 
3217 static int __init keep_bootcon_setup(char *str)
3218 {
3219 	keep_bootcon = 1;
3220 	pr_info("debug: skip boot console de-registration.\n");
3221 
3222 	return 0;
3223 }
3224 
3225 early_param("keep_bootcon", keep_bootcon_setup);
3226 
3227 /*
3228  * This is called by register_console() to try to match
3229  * the newly registered console with any of the ones selected
3230  * by either the command line or add_preferred_console() and
3231  * setup/enable it.
3232  *
3233  * Care need to be taken with consoles that are statically
3234  * enabled such as netconsole
3235  */
3236 static int try_enable_preferred_console(struct console *newcon,
3237 					bool user_specified)
3238 {
3239 	struct console_cmdline *c;
3240 	int i, err;
3241 
3242 	for (i = 0, c = console_cmdline;
3243 	     i < MAX_CMDLINECONSOLES && c->name[0];
3244 	     i++, c++) {
3245 		if (c->user_specified != user_specified)
3246 			continue;
3247 		if (!newcon->match ||
3248 		    newcon->match(newcon, c->name, c->index, c->options) != 0) {
3249 			/* default matching */
3250 			BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name));
3251 			if (strcmp(c->name, newcon->name) != 0)
3252 				continue;
3253 			if (newcon->index >= 0 &&
3254 			    newcon->index != c->index)
3255 				continue;
3256 			if (newcon->index < 0)
3257 				newcon->index = c->index;
3258 
3259 			if (_braille_register_console(newcon, c))
3260 				return 0;
3261 
3262 			if (newcon->setup &&
3263 			    (err = newcon->setup(newcon, c->options)) != 0)
3264 				return err;
3265 		}
3266 		newcon->flags |= CON_ENABLED;
3267 		if (i == preferred_console)
3268 			newcon->flags |= CON_CONSDEV;
3269 		return 0;
3270 	}
3271 
3272 	/*
3273 	 * Some consoles, such as pstore and netconsole, can be enabled even
3274 	 * without matching. Accept the pre-enabled consoles only when match()
3275 	 * and setup() had a chance to be called.
3276 	 */
3277 	if (newcon->flags & CON_ENABLED && c->user_specified ==	user_specified)
3278 		return 0;
3279 
3280 	return -ENOENT;
3281 }
3282 
3283 /* Try to enable the console unconditionally */
3284 static void try_enable_default_console(struct console *newcon)
3285 {
3286 	if (newcon->index < 0)
3287 		newcon->index = 0;
3288 
3289 	if (newcon->setup && newcon->setup(newcon, NULL) != 0)
3290 		return;
3291 
3292 	newcon->flags |= CON_ENABLED;
3293 
3294 	if (newcon->device)
3295 		newcon->flags |= CON_CONSDEV;
3296 }
3297 
3298 #define con_printk(lvl, con, fmt, ...)			\
3299 	printk(lvl pr_fmt("%sconsole [%s%d] " fmt),	\
3300 	       (con->flags & CON_BOOT) ? "boot" : "",	\
3301 	       con->name, con->index, ##__VA_ARGS__)
3302 
3303 /*
3304  * The console driver calls this routine during kernel initialization
3305  * to register the console printing procedure with printk() and to
3306  * print any messages that were printed by the kernel before the
3307  * console driver was initialized.
3308  *
3309  * This can happen pretty early during the boot process (because of
3310  * early_printk) - sometimes before setup_arch() completes - be careful
3311  * of what kernel features are used - they may not be initialised yet.
3312  *
3313  * There are two types of consoles - bootconsoles (early_printk) and
3314  * "real" consoles (everything which is not a bootconsole) which are
3315  * handled differently.
3316  *  - Any number of bootconsoles can be registered at any time.
3317  *  - As soon as a "real" console is registered, all bootconsoles
3318  *    will be unregistered automatically.
3319  *  - Once a "real" console is registered, any attempt to register a
3320  *    bootconsoles will be rejected
3321  */
3322 void register_console(struct console *newcon)
3323 {
3324 	struct console *con;
3325 	bool bootcon_enabled = false;
3326 	bool realcon_enabled = false;
3327 	int err;
3328 
3329 	for_each_console(con) {
3330 		if (WARN(con == newcon, "console '%s%d' already registered\n",
3331 					 con->name, con->index))
3332 			return;
3333 	}
3334 
3335 	for_each_console(con) {
3336 		if (con->flags & CON_BOOT)
3337 			bootcon_enabled = true;
3338 		else
3339 			realcon_enabled = true;
3340 	}
3341 
3342 	/* Do not register boot consoles when there already is a real one. */
3343 	if (newcon->flags & CON_BOOT && realcon_enabled) {
3344 		pr_info("Too late to register bootconsole %s%d\n",
3345 			newcon->name, newcon->index);
3346 		return;
3347 	}
3348 
3349 	/*
3350 	 * See if we want to enable this console driver by default.
3351 	 *
3352 	 * Nope when a console is preferred by the command line, device
3353 	 * tree, or SPCR.
3354 	 *
3355 	 * The first real console with tty binding (driver) wins. More
3356 	 * consoles might get enabled before the right one is found.
3357 	 *
3358 	 * Note that a console with tty binding will have CON_CONSDEV
3359 	 * flag set and will be first in the list.
3360 	 */
3361 	if (preferred_console < 0) {
3362 		if (!console_drivers || !console_drivers->device ||
3363 		    console_drivers->flags & CON_BOOT) {
3364 			try_enable_default_console(newcon);
3365 		}
3366 	}
3367 
3368 	/* See if this console matches one we selected on the command line */
3369 	err = try_enable_preferred_console(newcon, true);
3370 
3371 	/* If not, try to match against the platform default(s) */
3372 	if (err == -ENOENT)
3373 		err = try_enable_preferred_console(newcon, false);
3374 
3375 	/* printk() messages are not printed to the Braille console. */
3376 	if (err || newcon->flags & CON_BRL)
3377 		return;
3378 
3379 	/*
3380 	 * If we have a bootconsole, and are switching to a real console,
3381 	 * don't print everything out again, since when the boot console, and
3382 	 * the real console are the same physical device, it's annoying to
3383 	 * see the beginning boot messages twice
3384 	 */
3385 	if (bootcon_enabled &&
3386 	    ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) {
3387 		newcon->flags &= ~CON_PRINTBUFFER;
3388 	}
3389 
3390 	/*
3391 	 *	Put this console in the list - keep the
3392 	 *	preferred driver at the head of the list.
3393 	 */
3394 	console_lock();
3395 	if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) {
3396 		newcon->next = console_drivers;
3397 		console_drivers = newcon;
3398 		if (newcon->next)
3399 			newcon->next->flags &= ~CON_CONSDEV;
3400 		/* Ensure this flag is always set for the head of the list */
3401 		newcon->flags |= CON_CONSDEV;
3402 	} else {
3403 		newcon->next = console_drivers->next;
3404 		console_drivers->next = newcon;
3405 	}
3406 
3407 	if (newcon->flags & CON_EXTENDED)
3408 		nr_ext_console_drivers++;
3409 
3410 	newcon->dropped = 0;
3411 	newcon->thread = NULL;
3412 	newcon->blocked = true;
3413 	mutex_init(&newcon->lock);
3414 
3415 	if (newcon->flags & CON_PRINTBUFFER) {
3416 		/* Get a consistent copy of @syslog_seq. */
3417 		mutex_lock(&syslog_lock);
3418 		newcon->seq = syslog_seq;
3419 		mutex_unlock(&syslog_lock);
3420 	} else {
3421 		/* Begin with next message. */
3422 		newcon->seq = prb_next_seq(prb);
3423 	}
3424 
3425 	if (printk_kthreads_available)
3426 		printk_start_kthread(newcon);
3427 
3428 	console_unlock();
3429 	console_sysfs_notify();
3430 
3431 	/*
3432 	 * By unregistering the bootconsoles after we enable the real console
3433 	 * we get the "console xxx enabled" message on all the consoles -
3434 	 * boot consoles, real consoles, etc - this is to ensure that end
3435 	 * users know there might be something in the kernel's log buffer that
3436 	 * went to the bootconsole (that they do not see on the real console)
3437 	 */
3438 	con_printk(KERN_INFO, newcon, "enabled\n");
3439 	if (bootcon_enabled &&
3440 	    ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) &&
3441 	    !keep_bootcon) {
3442 		/* We need to iterate through all boot consoles, to make
3443 		 * sure we print everything out, before we unregister them.
3444 		 */
3445 		for_each_console(con)
3446 			if (con->flags & CON_BOOT)
3447 				unregister_console(con);
3448 	}
3449 }
3450 EXPORT_SYMBOL(register_console);
3451 
3452 int unregister_console(struct console *console)
3453 {
3454 	struct task_struct *thd;
3455 	struct console *con;
3456 	int res;
3457 
3458 	con_printk(KERN_INFO, console, "disabled\n");
3459 
3460 	res = _braille_unregister_console(console);
3461 	if (res < 0)
3462 		return res;
3463 	if (res > 0)
3464 		return 0;
3465 
3466 	res = -ENODEV;
3467 	console_lock();
3468 	if (console_drivers == console) {
3469 		console_drivers=console->next;
3470 		res = 0;
3471 	} else {
3472 		for_each_console(con) {
3473 			if (con->next == console) {
3474 				con->next = console->next;
3475 				res = 0;
3476 				break;
3477 			}
3478 		}
3479 	}
3480 
3481 	if (res)
3482 		goto out_disable_unlock;
3483 
3484 	if (console->flags & CON_EXTENDED)
3485 		nr_ext_console_drivers--;
3486 
3487 	/*
3488 	 * If this isn't the last console and it has CON_CONSDEV set, we
3489 	 * need to set it on the next preferred console.
3490 	 */
3491 	if (console_drivers != NULL && console->flags & CON_CONSDEV)
3492 		console_drivers->flags |= CON_CONSDEV;
3493 
3494 	console->flags &= ~CON_ENABLED;
3495 
3496 	/*
3497 	 * console->thread can only be cleared under the console lock. But
3498 	 * stopping the thread must be done without the console lock. The
3499 	 * task that clears @thread is the task that stops the kthread.
3500 	 */
3501 	thd = console->thread;
3502 	console->thread = NULL;
3503 
3504 	console_unlock();
3505 
3506 	if (thd)
3507 		kthread_stop(thd);
3508 
3509 	console_sysfs_notify();
3510 
3511 	if (console->exit)
3512 		res = console->exit(console);
3513 
3514 	return res;
3515 
3516 out_disable_unlock:
3517 	console->flags &= ~CON_ENABLED;
3518 	console_unlock();
3519 
3520 	return res;
3521 }
3522 EXPORT_SYMBOL(unregister_console);
3523 
3524 /*
3525  * Initialize the console device. This is called *early*, so
3526  * we can't necessarily depend on lots of kernel help here.
3527  * Just do some early initializations, and do the complex setup
3528  * later.
3529  */
3530 void __init console_init(void)
3531 {
3532 	int ret;
3533 	initcall_t call;
3534 	initcall_entry_t *ce;
3535 
3536 	/* Setup the default TTY line discipline. */
3537 	n_tty_init();
3538 
3539 	/*
3540 	 * set up the console device so that later boot sequences can
3541 	 * inform about problems etc..
3542 	 */
3543 	ce = __con_initcall_start;
3544 	trace_initcall_level("console");
3545 	while (ce < __con_initcall_end) {
3546 		call = initcall_from_entry(ce);
3547 		trace_initcall_start(call);
3548 		ret = call();
3549 		trace_initcall_finish(call, ret);
3550 		ce++;
3551 	}
3552 }
3553 
3554 /*
3555  * Some boot consoles access data that is in the init section and which will
3556  * be discarded after the initcalls have been run. To make sure that no code
3557  * will access this data, unregister the boot consoles in a late initcall.
3558  *
3559  * If for some reason, such as deferred probe or the driver being a loadable
3560  * module, the real console hasn't registered yet at this point, there will
3561  * be a brief interval in which no messages are logged to the console, which
3562  * makes it difficult to diagnose problems that occur during this time.
3563  *
3564  * To mitigate this problem somewhat, only unregister consoles whose memory
3565  * intersects with the init section. Note that all other boot consoles will
3566  * get unregistered when the real preferred console is registered.
3567  */
3568 static int __init printk_late_init(void)
3569 {
3570 	struct console *con;
3571 	int ret;
3572 
3573 	for_each_console(con) {
3574 		if (!(con->flags & CON_BOOT))
3575 			continue;
3576 
3577 		/* Check addresses that might be used for enabled consoles. */
3578 		if (init_section_intersects(con, sizeof(*con)) ||
3579 		    init_section_contains(con->write, 0) ||
3580 		    init_section_contains(con->read, 0) ||
3581 		    init_section_contains(con->device, 0) ||
3582 		    init_section_contains(con->unblank, 0) ||
3583 		    init_section_contains(con->data, 0)) {
3584 			/*
3585 			 * Please, consider moving the reported consoles out
3586 			 * of the init section.
3587 			 */
3588 			pr_warn("bootconsole [%s%d] uses init memory and must be disabled even before the real one is ready\n",
3589 				con->name, con->index);
3590 			unregister_console(con);
3591 		}
3592 	}
3593 	ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL,
3594 					console_cpu_notify);
3595 	WARN_ON(ret < 0);
3596 	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online",
3597 					console_cpu_notify, NULL);
3598 	WARN_ON(ret < 0);
3599 	printk_sysctl_init();
3600 	return 0;
3601 }
3602 late_initcall(printk_late_init);
3603 
3604 static int __init printk_activate_kthreads(void)
3605 {
3606 	struct console *con;
3607 
3608 	console_lock();
3609 	printk_kthreads_available = true;
3610 	for_each_console(con)
3611 		printk_start_kthread(con);
3612 	console_unlock();
3613 
3614 	return 0;
3615 }
3616 early_initcall(printk_activate_kthreads);
3617 
3618 #if defined CONFIG_PRINTK
3619 /* If @con is specified, only wait for that console. Otherwise wait for all. */
3620 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress)
3621 {
3622 	int remaining = timeout_ms;
3623 	struct console *c;
3624 	u64 last_diff = 0;
3625 	u64 printk_seq;
3626 	u64 diff;
3627 	u64 seq;
3628 
3629 	might_sleep();
3630 
3631 	seq = prb_next_seq(prb);
3632 
3633 	for (;;) {
3634 		diff = 0;
3635 
3636 		console_lock();
3637 		for_each_console(c) {
3638 			if (con && con != c)
3639 				continue;
3640 			if (!console_is_usable(c))
3641 				continue;
3642 			printk_seq = c->seq;
3643 			if (printk_seq < seq)
3644 				diff += seq - printk_seq;
3645 		}
3646 		console_unlock();
3647 
3648 		if (diff != last_diff && reset_on_progress)
3649 			remaining = timeout_ms;
3650 
3651 		if (diff == 0 || remaining == 0)
3652 			break;
3653 
3654 		if (remaining < 0) {
3655 			/* no timeout limit */
3656 			msleep(100);
3657 		} else if (remaining < 100) {
3658 			msleep(remaining);
3659 			remaining = 0;
3660 		} else {
3661 			msleep(100);
3662 			remaining -= 100;
3663 		}
3664 
3665 		last_diff = diff;
3666 	}
3667 
3668 	return (diff == 0);
3669 }
3670 
3671 /**
3672  * pr_flush() - Wait for printing threads to catch up.
3673  *
3674  * @timeout_ms:        The maximum time (in ms) to wait.
3675  * @reset_on_progress: Reset the timeout if forward progress is seen.
3676  *
3677  * A value of 0 for @timeout_ms means no waiting will occur. A value of -1
3678  * represents infinite waiting.
3679  *
3680  * If @reset_on_progress is true, the timeout will be reset whenever any
3681  * printer has been seen to make some forward progress.
3682  *
3683  * Context: Process context. May sleep while acquiring console lock.
3684  * Return: true if all enabled printers are caught up.
3685  */
3686 bool pr_flush(int timeout_ms, bool reset_on_progress)
3687 {
3688 	return __pr_flush(NULL, timeout_ms, reset_on_progress);
3689 }
3690 EXPORT_SYMBOL(pr_flush);
3691 
3692 static void __printk_fallback_preferred_direct(void)
3693 {
3694 	printk_prefer_direct_enter();
3695 	pr_err("falling back to preferred direct printing\n");
3696 	printk_kthreads_available = false;
3697 }
3698 
3699 /*
3700  * Enter preferred direct printing, but never exit. Mark console threads as
3701  * unavailable. The system is then forever in preferred direct printing and
3702  * any printing threads will exit.
3703  *
3704  * Must *not* be called under console_lock. Use
3705  * __printk_fallback_preferred_direct() if already holding console_lock.
3706  */
3707 static void printk_fallback_preferred_direct(void)
3708 {
3709 	console_lock();
3710 	__printk_fallback_preferred_direct();
3711 	console_unlock();
3712 }
3713 
3714 /*
3715  * Print a record for a given console, not allowing another printk() caller
3716  * to take over. This is appropriate for contexts that do not have the
3717  * console_lock.
3718  *
3719  * See __console_emit_next_record() for argument and return details.
3720  */
3721 static bool console_emit_next_record(struct console *con, char *text, char *ext_text,
3722 				     char *dropped_text)
3723 {
3724 	return __console_emit_next_record(con, text, ext_text, dropped_text, NULL);
3725 }
3726 
3727 static bool printer_should_wake(struct console *con, u64 seq)
3728 {
3729 	short flags;
3730 
3731 	if (kthread_should_stop() || !printk_kthreads_available)
3732 		return true;
3733 
3734 	if (con->blocked ||
3735 	    console_kthreads_atomically_blocked() ||
3736 	    block_console_kthreads ||
3737 	    system_state > SYSTEM_RUNNING ||
3738 	    oops_in_progress) {
3739 		return false;
3740 	}
3741 
3742 	/*
3743 	 * This is an unsafe read from con->flags, but a false positive is
3744 	 * not a problem. Worst case it would allow the printer to wake up
3745 	 * although it is disabled. But the printer will notice that when
3746 	 * attempting to print and instead go back to sleep.
3747 	 */
3748 	flags = data_race(READ_ONCE(con->flags));
3749 
3750 	if (!__console_is_usable(flags))
3751 		return false;
3752 
3753 	return prb_read_valid(prb, seq, NULL);
3754 }
3755 
3756 static int printk_kthread_func(void *data)
3757 {
3758 	struct console *con = data;
3759 	char *dropped_text = NULL;
3760 	char *ext_text = NULL;
3761 	u64 seq = 0;
3762 	char *text;
3763 	int error;
3764 
3765 	text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
3766 	if (!text) {
3767 		con_printk(KERN_ERR, con, "failed to allocate text buffer\n");
3768 		printk_fallback_preferred_direct();
3769 		goto out;
3770 	}
3771 
3772 	if (con->flags & CON_EXTENDED) {
3773 		ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
3774 		if (!ext_text) {
3775 			con_printk(KERN_ERR, con, "failed to allocate ext_text buffer\n");
3776 			printk_fallback_preferred_direct();
3777 			goto out;
3778 		}
3779 	} else {
3780 		dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL);
3781 		if (!dropped_text) {
3782 			con_printk(KERN_ERR, con, "failed to allocate dropped_text buffer\n");
3783 			printk_fallback_preferred_direct();
3784 			goto out;
3785 		}
3786 	}
3787 
3788 	con_printk(KERN_INFO, con, "printing thread started\n");
3789 
3790 	for (;;) {
3791 		/*
3792 		 * Guarantee this task is visible on the waitqueue before
3793 		 * checking the wake condition.
3794 		 *
3795 		 * The full memory barrier within set_current_state() of
3796 		 * prepare_to_wait_event() pairs with the full memory barrier
3797 		 * within wq_has_sleeper().
3798 		 *
3799 		 * This pairs with __wake_up_klogd:A.
3800 		 */
3801 		error = wait_event_interruptible(log_wait,
3802 				printer_should_wake(con, seq)); /* LMM(printk_kthread_func:A) */
3803 
3804 		if (kthread_should_stop() || !printk_kthreads_available)
3805 			break;
3806 
3807 		if (error)
3808 			continue;
3809 
3810 		error = mutex_lock_interruptible(&con->lock);
3811 		if (error)
3812 			continue;
3813 
3814 		if (con->blocked ||
3815 		    !console_kthread_printing_tryenter()) {
3816 			/* Another context has locked the console_lock. */
3817 			mutex_unlock(&con->lock);
3818 			continue;
3819 		}
3820 
3821 		/*
3822 		 * Although this context has not locked the console_lock, it
3823 		 * is known that the console_lock is not locked and it is not
3824 		 * possible for any other context to lock the console_lock.
3825 		 * Therefore it is safe to read con->flags.
3826 		 */
3827 
3828 		if (!__console_is_usable(con->flags)) {
3829 			console_kthread_printing_exit();
3830 			mutex_unlock(&con->lock);
3831 			continue;
3832 		}
3833 
3834 		/*
3835 		 * Even though the printk kthread is always preemptible, it is
3836 		 * still not allowed to call cond_resched() from within
3837 		 * console drivers. The task may become non-preemptible in the
3838 		 * console driver call chain. For example, vt_console_print()
3839 		 * takes a spinlock and then can call into fbcon_redraw(),
3840 		 * which can conditionally invoke cond_resched().
3841 		 */
3842 		console_may_schedule = 0;
3843 		console_emit_next_record(con, text, ext_text, dropped_text);
3844 
3845 		seq = con->seq;
3846 
3847 		console_kthread_printing_exit();
3848 
3849 		mutex_unlock(&con->lock);
3850 	}
3851 
3852 	con_printk(KERN_INFO, con, "printing thread stopped\n");
3853 out:
3854 	kfree(dropped_text);
3855 	kfree(ext_text);
3856 	kfree(text);
3857 
3858 	console_lock();
3859 	/*
3860 	 * If this kthread is being stopped by another task, con->thread will
3861 	 * already be NULL. That is fine. The important thing is that it is
3862 	 * NULL after the kthread exits.
3863 	 */
3864 	con->thread = NULL;
3865 	console_unlock();
3866 
3867 	return 0;
3868 }
3869 
3870 /* Must be called under console_lock. */
3871 static void printk_start_kthread(struct console *con)
3872 {
3873 	/*
3874 	 * Do not start a kthread if there is no write() callback. The
3875 	 * kthreads assume the write() callback exists.
3876 	 */
3877 	if (!con->write)
3878 		return;
3879 
3880 	con->thread = kthread_run(printk_kthread_func, con,
3881 				  "pr/%s%d", con->name, con->index);
3882 	if (IS_ERR(con->thread)) {
3883 		con->thread = NULL;
3884 		con_printk(KERN_ERR, con, "unable to start printing thread\n");
3885 		__printk_fallback_preferred_direct();
3886 		return;
3887 	}
3888 }
3889 
3890 /*
3891  * Delayed printk version, for scheduler-internal messages:
3892  */
3893 #define PRINTK_PENDING_WAKEUP		0x01
3894 #define PRINTK_PENDING_DIRECT_OUTPUT	0x02
3895 
3896 static DEFINE_PER_CPU(int, printk_pending);
3897 
3898 static void wake_up_klogd_work_func(struct irq_work *irq_work)
3899 {
3900 	int pending = this_cpu_xchg(printk_pending, 0);
3901 
3902 	if (pending & PRINTK_PENDING_DIRECT_OUTPUT) {
3903 		printk_prefer_direct_enter();
3904 
3905 		/* If trylock fails, someone else is doing the printing */
3906 		if (console_trylock())
3907 			console_unlock();
3908 
3909 		printk_prefer_direct_exit();
3910 	}
3911 
3912 	if (pending & PRINTK_PENDING_WAKEUP)
3913 		wake_up_interruptible(&log_wait);
3914 }
3915 
3916 static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
3917 	IRQ_WORK_INIT_LAZY(wake_up_klogd_work_func);
3918 
3919 static void __wake_up_klogd(int val)
3920 {
3921 	if (!printk_percpu_data_ready())
3922 		return;
3923 
3924 	preempt_disable();
3925 	/*
3926 	 * Guarantee any new records can be seen by tasks preparing to wait
3927 	 * before this context checks if the wait queue is empty.
3928 	 *
3929 	 * The full memory barrier within wq_has_sleeper() pairs with the full
3930 	 * memory barrier within set_current_state() of
3931 	 * prepare_to_wait_event(), which is called after ___wait_event() adds
3932 	 * the waiter but before it has checked the wait condition.
3933 	 *
3934 	 * This pairs with devkmsg_read:A, syslog_print:A, and
3935 	 * printk_kthread_func:A.
3936 	 */
3937 	if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */
3938 	    (val & PRINTK_PENDING_DIRECT_OUTPUT)) {
3939 		this_cpu_or(printk_pending, val);
3940 		irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
3941 	}
3942 	preempt_enable();
3943 }
3944 
3945 void wake_up_klogd(void)
3946 {
3947 	__wake_up_klogd(PRINTK_PENDING_WAKEUP);
3948 }
3949 
3950 void defer_console_output(void)
3951 {
3952 	/*
3953 	 * New messages may have been added directly to the ringbuffer
3954 	 * using vprintk_store(), so wake any waiters as well.
3955 	 */
3956 	int val = PRINTK_PENDING_WAKEUP;
3957 
3958 	/*
3959 	 * Make sure that some context will print the messages when direct
3960 	 * printing is allowed. This happens in situations when the kthreads
3961 	 * may not be as reliable or perhaps unusable.
3962 	 */
3963 	if (allow_direct_printing())
3964 		val |= PRINTK_PENDING_DIRECT_OUTPUT;
3965 
3966 	__wake_up_klogd(val);
3967 }
3968 
3969 void printk_trigger_flush(void)
3970 {
3971 	defer_console_output();
3972 }
3973 
3974 int vprintk_deferred(const char *fmt, va_list args)
3975 {
3976 	int r;
3977 
3978 	r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args);
3979 	defer_console_output();
3980 
3981 	return r;
3982 }
3983 
3984 int _printk_deferred(const char *fmt, ...)
3985 {
3986 	va_list args;
3987 	int r;
3988 
3989 	va_start(args, fmt);
3990 	r = vprintk_deferred(fmt, args);
3991 	va_end(args);
3992 
3993 	return r;
3994 }
3995 
3996 /*
3997  * printk rate limiting, lifted from the networking subsystem.
3998  *
3999  * This enforces a rate limit: not more than 10 kernel messages
4000  * every 5s to make a denial-of-service attack impossible.
4001  */
4002 DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
4003 
4004 int __printk_ratelimit(const char *func)
4005 {
4006 	return ___ratelimit(&printk_ratelimit_state, func);
4007 }
4008 EXPORT_SYMBOL(__printk_ratelimit);
4009 
4010 /**
4011  * printk_timed_ratelimit - caller-controlled printk ratelimiting
4012  * @caller_jiffies: pointer to caller's state
4013  * @interval_msecs: minimum interval between prints
4014  *
4015  * printk_timed_ratelimit() returns true if more than @interval_msecs
4016  * milliseconds have elapsed since the last time printk_timed_ratelimit()
4017  * returned true.
4018  */
4019 bool printk_timed_ratelimit(unsigned long *caller_jiffies,
4020 			unsigned int interval_msecs)
4021 {
4022 	unsigned long elapsed = jiffies - *caller_jiffies;
4023 
4024 	if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs))
4025 		return false;
4026 
4027 	*caller_jiffies = jiffies;
4028 	return true;
4029 }
4030 EXPORT_SYMBOL(printk_timed_ratelimit);
4031 
4032 static DEFINE_SPINLOCK(dump_list_lock);
4033 static LIST_HEAD(dump_list);
4034 
4035 /**
4036  * kmsg_dump_register - register a kernel log dumper.
4037  * @dumper: pointer to the kmsg_dumper structure
4038  *
4039  * Adds a kernel log dumper to the system. The dump callback in the
4040  * structure will be called when the kernel oopses or panics and must be
4041  * set. Returns zero on success and %-EINVAL or %-EBUSY otherwise.
4042  */
4043 int kmsg_dump_register(struct kmsg_dumper *dumper)
4044 {
4045 	unsigned long flags;
4046 	int err = -EBUSY;
4047 
4048 	/* The dump callback needs to be set */
4049 	if (!dumper->dump)
4050 		return -EINVAL;
4051 
4052 	spin_lock_irqsave(&dump_list_lock, flags);
4053 	/* Don't allow registering multiple times */
4054 	if (!dumper->registered) {
4055 		dumper->registered = 1;
4056 		list_add_tail_rcu(&dumper->list, &dump_list);
4057 		err = 0;
4058 	}
4059 	spin_unlock_irqrestore(&dump_list_lock, flags);
4060 
4061 	return err;
4062 }
4063 EXPORT_SYMBOL_GPL(kmsg_dump_register);
4064 
4065 /**
4066  * kmsg_dump_unregister - unregister a kmsg dumper.
4067  * @dumper: pointer to the kmsg_dumper structure
4068  *
4069  * Removes a dump device from the system. Returns zero on success and
4070  * %-EINVAL otherwise.
4071  */
4072 int kmsg_dump_unregister(struct kmsg_dumper *dumper)
4073 {
4074 	unsigned long flags;
4075 	int err = -EINVAL;
4076 
4077 	spin_lock_irqsave(&dump_list_lock, flags);
4078 	if (dumper->registered) {
4079 		dumper->registered = 0;
4080 		list_del_rcu(&dumper->list);
4081 		err = 0;
4082 	}
4083 	spin_unlock_irqrestore(&dump_list_lock, flags);
4084 	synchronize_rcu();
4085 
4086 	return err;
4087 }
4088 EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
4089 
4090 static bool always_kmsg_dump;
4091 module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
4092 
4093 const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason)
4094 {
4095 	switch (reason) {
4096 	case KMSG_DUMP_PANIC:
4097 		return "Panic";
4098 	case KMSG_DUMP_OOPS:
4099 		return "Oops";
4100 	case KMSG_DUMP_EMERG:
4101 		return "Emergency";
4102 	case KMSG_DUMP_SHUTDOWN:
4103 		return "Shutdown";
4104 	default:
4105 		return "Unknown";
4106 	}
4107 }
4108 EXPORT_SYMBOL_GPL(kmsg_dump_reason_str);
4109 
4110 /**
4111  * kmsg_dump - dump kernel log to kernel message dumpers.
4112  * @reason: the reason (oops, panic etc) for dumping
4113  *
4114  * Call each of the registered dumper's dump() callback, which can
4115  * retrieve the kmsg records with kmsg_dump_get_line() or
4116  * kmsg_dump_get_buffer().
4117  */
4118 void kmsg_dump(enum kmsg_dump_reason reason)
4119 {
4120 	struct kmsg_dumper *dumper;
4121 
4122 	rcu_read_lock();
4123 	list_for_each_entry_rcu(dumper, &dump_list, list) {
4124 		enum kmsg_dump_reason max_reason = dumper->max_reason;
4125 
4126 		/*
4127 		 * If client has not provided a specific max_reason, default
4128 		 * to KMSG_DUMP_OOPS, unless always_kmsg_dump was set.
4129 		 */
4130 		if (max_reason == KMSG_DUMP_UNDEF) {
4131 			max_reason = always_kmsg_dump ? KMSG_DUMP_MAX :
4132 							KMSG_DUMP_OOPS;
4133 		}
4134 		if (reason > max_reason)
4135 			continue;
4136 
4137 		/* invoke dumper which will iterate over records */
4138 		dumper->dump(dumper, reason);
4139 	}
4140 	rcu_read_unlock();
4141 }
4142 
4143 /**
4144  * kmsg_dump_get_line - retrieve one kmsg log line
4145  * @iter: kmsg dump iterator
4146  * @syslog: include the "<4>" prefixes
4147  * @line: buffer to copy the line to
4148  * @size: maximum size of the buffer
4149  * @len: length of line placed into buffer
4150  *
4151  * Start at the beginning of the kmsg buffer, with the oldest kmsg
4152  * record, and copy one record into the provided buffer.
4153  *
4154  * Consecutive calls will return the next available record moving
4155  * towards the end of the buffer with the youngest messages.
4156  *
4157  * A return value of FALSE indicates that there are no more records to
4158  * read.
4159  */
4160 bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog,
4161 			char *line, size_t size, size_t *len)
4162 {
4163 	u64 min_seq = latched_seq_read_nolock(&clear_seq);
4164 	struct printk_info info;
4165 	unsigned int line_count;
4166 	struct printk_record r;
4167 	size_t l = 0;
4168 	bool ret = false;
4169 
4170 	if (iter->cur_seq < min_seq)
4171 		iter->cur_seq = min_seq;
4172 
4173 	prb_rec_init_rd(&r, &info, line, size);
4174 
4175 	/* Read text or count text lines? */
4176 	if (line) {
4177 		if (!prb_read_valid(prb, iter->cur_seq, &r))
4178 			goto out;
4179 		l = record_print_text(&r, syslog, printk_time);
4180 	} else {
4181 		if (!prb_read_valid_info(prb, iter->cur_seq,
4182 					 &info, &line_count)) {
4183 			goto out;
4184 		}
4185 		l = get_record_print_text_size(&info, line_count, syslog,
4186 					       printk_time);
4187 
4188 	}
4189 
4190 	iter->cur_seq = r.info->seq + 1;
4191 	ret = true;
4192 out:
4193 	if (len)
4194 		*len = l;
4195 	return ret;
4196 }
4197 EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
4198 
4199 /**
4200  * kmsg_dump_get_buffer - copy kmsg log lines
4201  * @iter: kmsg dump iterator
4202  * @syslog: include the "<4>" prefixes
4203  * @buf: buffer to copy the line to
4204  * @size: maximum size of the buffer
4205  * @len_out: length of line placed into buffer
4206  *
4207  * Start at the end of the kmsg buffer and fill the provided buffer
4208  * with as many of the *youngest* kmsg records that fit into it.
4209  * If the buffer is large enough, all available kmsg records will be
4210  * copied with a single call.
4211  *
4212  * Consecutive calls will fill the buffer with the next block of
4213  * available older records, not including the earlier retrieved ones.
4214  *
4215  * A return value of FALSE indicates that there are no more records to
4216  * read.
4217  */
4218 bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog,
4219 			  char *buf, size_t size, size_t *len_out)
4220 {
4221 	u64 min_seq = latched_seq_read_nolock(&clear_seq);
4222 	struct printk_info info;
4223 	struct printk_record r;
4224 	u64 seq;
4225 	u64 next_seq;
4226 	size_t len = 0;
4227 	bool ret = false;
4228 	bool time = printk_time;
4229 
4230 	if (!buf || !size)
4231 		goto out;
4232 
4233 	if (iter->cur_seq < min_seq)
4234 		iter->cur_seq = min_seq;
4235 
4236 	if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) {
4237 		if (info.seq != iter->cur_seq) {
4238 			/* messages are gone, move to first available one */
4239 			iter->cur_seq = info.seq;
4240 		}
4241 	}
4242 
4243 	/* last entry */
4244 	if (iter->cur_seq >= iter->next_seq)
4245 		goto out;
4246 
4247 	/*
4248 	 * Find first record that fits, including all following records,
4249 	 * into the user-provided buffer for this dump. Pass in size-1
4250 	 * because this function (by way of record_print_text()) will
4251 	 * not write more than size-1 bytes of text into @buf.
4252 	 */
4253 	seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq,
4254 				     size - 1, syslog, time);
4255 
4256 	/*
4257 	 * Next kmsg_dump_get_buffer() invocation will dump block of
4258 	 * older records stored right before this one.
4259 	 */
4260 	next_seq = seq;
4261 
4262 	prb_rec_init_rd(&r, &info, buf, size);
4263 
4264 	len = 0;
4265 	prb_for_each_record(seq, prb, seq, &r) {
4266 		if (r.info->seq >= iter->next_seq)
4267 			break;
4268 
4269 		len += record_print_text(&r, syslog, time);
4270 
4271 		/* Adjust record to store to remaining buffer space. */
4272 		prb_rec_init_rd(&r, &info, buf + len, size - len);
4273 	}
4274 
4275 	iter->next_seq = next_seq;
4276 	ret = true;
4277 out:
4278 	if (len_out)
4279 		*len_out = len;
4280 	return ret;
4281 }
4282 EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
4283 
4284 /**
4285  * kmsg_dump_rewind - reset the iterator
4286  * @iter: kmsg dump iterator
4287  *
4288  * Reset the dumper's iterator so that kmsg_dump_get_line() and
4289  * kmsg_dump_get_buffer() can be called again and used multiple
4290  * times within the same dumper.dump() callback.
4291  */
4292 void kmsg_dump_rewind(struct kmsg_dump_iter *iter)
4293 {
4294 	iter->cur_seq = latched_seq_read_nolock(&clear_seq);
4295 	iter->next_seq = prb_next_seq(prb);
4296 }
4297 EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
4298 
4299 #endif
4300 
4301 #ifdef CONFIG_SMP
4302 static atomic_t printk_cpu_sync_owner = ATOMIC_INIT(-1);
4303 static atomic_t printk_cpu_sync_nested = ATOMIC_INIT(0);
4304 
4305 /**
4306  * __printk_cpu_sync_wait() - Busy wait until the printk cpu-reentrant
4307  *                            spinning lock is not owned by any CPU.
4308  *
4309  * Context: Any context.
4310  */
4311 void __printk_cpu_sync_wait(void)
4312 {
4313 	do {
4314 		cpu_relax();
4315 	} while (atomic_read(&printk_cpu_sync_owner) != -1);
4316 }
4317 EXPORT_SYMBOL(__printk_cpu_sync_wait);
4318 
4319 /**
4320  * __printk_cpu_sync_try_get() - Try to acquire the printk cpu-reentrant
4321  *                               spinning lock.
4322  *
4323  * If no processor has the lock, the calling processor takes the lock and
4324  * becomes the owner. If the calling processor is already the owner of the
4325  * lock, this function succeeds immediately.
4326  *
4327  * Context: Any context. Expects interrupts to be disabled.
4328  * Return: 1 on success, otherwise 0.
4329  */
4330 int __printk_cpu_sync_try_get(void)
4331 {
4332 	int cpu;
4333 	int old;
4334 
4335 	cpu = smp_processor_id();
4336 
4337 	/*
4338 	 * Guarantee loads and stores from this CPU when it is the lock owner
4339 	 * are _not_ visible to the previous lock owner. This pairs with
4340 	 * __printk_cpu_sync_put:B.
4341 	 *
4342 	 * Memory barrier involvement:
4343 	 *
4344 	 * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B,
4345 	 * then __printk_cpu_sync_put:A can never read from
4346 	 * __printk_cpu_sync_try_get:B.
4347 	 *
4348 	 * Relies on:
4349 	 *
4350 	 * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B
4351 	 * of the previous CPU
4352 	 *    matching
4353 	 * ACQUIRE from __printk_cpu_sync_try_get:A to
4354 	 * __printk_cpu_sync_try_get:B of this CPU
4355 	 */
4356 	old = atomic_cmpxchg_acquire(&printk_cpu_sync_owner, -1,
4357 				     cpu); /* LMM(__printk_cpu_sync_try_get:A) */
4358 	if (old == -1) {
4359 		/*
4360 		 * This CPU is now the owner and begins loading/storing
4361 		 * data: LMM(__printk_cpu_sync_try_get:B)
4362 		 */
4363 		return 1;
4364 
4365 	} else if (old == cpu) {
4366 		/* This CPU is already the owner. */
4367 		atomic_inc(&printk_cpu_sync_nested);
4368 		return 1;
4369 	}
4370 
4371 	return 0;
4372 }
4373 EXPORT_SYMBOL(__printk_cpu_sync_try_get);
4374 
4375 /**
4376  * __printk_cpu_sync_put() - Release the printk cpu-reentrant spinning lock.
4377  *
4378  * The calling processor must be the owner of the lock.
4379  *
4380  * Context: Any context. Expects interrupts to be disabled.
4381  */
4382 void __printk_cpu_sync_put(void)
4383 {
4384 	if (atomic_read(&printk_cpu_sync_nested)) {
4385 		atomic_dec(&printk_cpu_sync_nested);
4386 		return;
4387 	}
4388 
4389 	/*
4390 	 * This CPU is finished loading/storing data:
4391 	 * LMM(__printk_cpu_sync_put:A)
4392 	 */
4393 
4394 	/*
4395 	 * Guarantee loads and stores from this CPU when it was the
4396 	 * lock owner are visible to the next lock owner. This pairs
4397 	 * with __printk_cpu_sync_try_get:A.
4398 	 *
4399 	 * Memory barrier involvement:
4400 	 *
4401 	 * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B,
4402 	 * then __printk_cpu_sync_try_get:B reads from __printk_cpu_sync_put:A.
4403 	 *
4404 	 * Relies on:
4405 	 *
4406 	 * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B
4407 	 * of this CPU
4408 	 *    matching
4409 	 * ACQUIRE from __printk_cpu_sync_try_get:A to
4410 	 * __printk_cpu_sync_try_get:B of the next CPU
4411 	 */
4412 	atomic_set_release(&printk_cpu_sync_owner,
4413 			   -1); /* LMM(__printk_cpu_sync_put:B) */
4414 }
4415 EXPORT_SYMBOL(__printk_cpu_sync_put);
4416 #endif /* CONFIG_SMP */
4417