1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/kernel/printk.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 * 7 * Modified to make sys_syslog() more flexible: added commands to 8 * return the last 4k of kernel messages, regardless of whether 9 * they've been read or not. Added option to suppress kernel printk's 10 * to the console. Added hook for sending the console messages 11 * elsewhere, in preparation for a serial line console (someday). 12 * Ted Ts'o, 2/11/93. 13 * Modified for sysctl support, 1/8/97, Chris Horn. 14 * Fixed SMP synchronization, 08/08/99, Manfred Spraul 15 * manfred@colorfullife.com 16 * Rewrote bits to get rid of console_lock 17 * 01Mar01 Andrew Morton 18 */ 19 20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 21 22 #include <linux/kernel.h> 23 #include <linux/mm.h> 24 #include <linux/tty.h> 25 #include <linux/tty_driver.h> 26 #include <linux/console.h> 27 #include <linux/init.h> 28 #include <linux/jiffies.h> 29 #include <linux/nmi.h> 30 #include <linux/module.h> 31 #include <linux/moduleparam.h> 32 #include <linux/delay.h> 33 #include <linux/smp.h> 34 #include <linux/security.h> 35 #include <linux/memblock.h> 36 #include <linux/syscalls.h> 37 #include <linux/crash_core.h> 38 #include <linux/ratelimit.h> 39 #include <linux/kmsg_dump.h> 40 #include <linux/syslog.h> 41 #include <linux/cpu.h> 42 #include <linux/rculist.h> 43 #include <linux/poll.h> 44 #include <linux/irq_work.h> 45 #include <linux/ctype.h> 46 #include <linux/uio.h> 47 #include <linux/sched/clock.h> 48 #include <linux/sched/debug.h> 49 #include <linux/sched/task_stack.h> 50 51 #include <linux/uaccess.h> 52 #include <asm/sections.h> 53 54 #include <trace/events/initcall.h> 55 #define CREATE_TRACE_POINTS 56 #include <trace/events/printk.h> 57 58 #include "printk_ringbuffer.h" 59 #include "console_cmdline.h" 60 #include "braille.h" 61 #include "internal.h" 62 63 int console_printk[4] = { 64 CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ 65 MESSAGE_LOGLEVEL_DEFAULT, /* default_message_loglevel */ 66 CONSOLE_LOGLEVEL_MIN, /* minimum_console_loglevel */ 67 CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */ 68 }; 69 EXPORT_SYMBOL_GPL(console_printk); 70 71 atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0); 72 EXPORT_SYMBOL(ignore_console_lock_warning); 73 74 /* 75 * Low level drivers may need that to know if they can schedule in 76 * their unblank() callback or not. So let's export it. 77 */ 78 int oops_in_progress; 79 EXPORT_SYMBOL(oops_in_progress); 80 81 /* 82 * console_mutex protects console_list updates and console->flags updates. 83 * The flags are synchronized only for consoles that are registered, i.e. 84 * accessible via the console list. 85 */ 86 static DEFINE_MUTEX(console_mutex); 87 88 /* 89 * console_sem protects updates to console->seq and console_suspended, 90 * and also provides serialization for console printing. 91 */ 92 static DEFINE_SEMAPHORE(console_sem); 93 HLIST_HEAD(console_list); 94 EXPORT_SYMBOL_GPL(console_list); 95 DEFINE_STATIC_SRCU(console_srcu); 96 97 /* 98 * System may need to suppress printk message under certain 99 * circumstances, like after kernel panic happens. 100 */ 101 int __read_mostly suppress_printk; 102 103 /* 104 * During panic, heavy printk by other CPUs can delay the 105 * panic and risk deadlock on console resources. 106 */ 107 static int __read_mostly suppress_panic_printk; 108 109 #ifdef CONFIG_LOCKDEP 110 static struct lockdep_map console_lock_dep_map = { 111 .name = "console_lock" 112 }; 113 114 void lockdep_assert_console_list_lock_held(void) 115 { 116 lockdep_assert_held(&console_mutex); 117 } 118 EXPORT_SYMBOL(lockdep_assert_console_list_lock_held); 119 #endif 120 121 #ifdef CONFIG_DEBUG_LOCK_ALLOC 122 bool console_srcu_read_lock_is_held(void) 123 { 124 return srcu_read_lock_held(&console_srcu); 125 } 126 EXPORT_SYMBOL(console_srcu_read_lock_is_held); 127 #endif 128 129 enum devkmsg_log_bits { 130 __DEVKMSG_LOG_BIT_ON = 0, 131 __DEVKMSG_LOG_BIT_OFF, 132 __DEVKMSG_LOG_BIT_LOCK, 133 }; 134 135 enum devkmsg_log_masks { 136 DEVKMSG_LOG_MASK_ON = BIT(__DEVKMSG_LOG_BIT_ON), 137 DEVKMSG_LOG_MASK_OFF = BIT(__DEVKMSG_LOG_BIT_OFF), 138 DEVKMSG_LOG_MASK_LOCK = BIT(__DEVKMSG_LOG_BIT_LOCK), 139 }; 140 141 /* Keep both the 'on' and 'off' bits clear, i.e. ratelimit by default: */ 142 #define DEVKMSG_LOG_MASK_DEFAULT 0 143 144 static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; 145 146 static int __control_devkmsg(char *str) 147 { 148 size_t len; 149 150 if (!str) 151 return -EINVAL; 152 153 len = str_has_prefix(str, "on"); 154 if (len) { 155 devkmsg_log = DEVKMSG_LOG_MASK_ON; 156 return len; 157 } 158 159 len = str_has_prefix(str, "off"); 160 if (len) { 161 devkmsg_log = DEVKMSG_LOG_MASK_OFF; 162 return len; 163 } 164 165 len = str_has_prefix(str, "ratelimit"); 166 if (len) { 167 devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; 168 return len; 169 } 170 171 return -EINVAL; 172 } 173 174 static int __init control_devkmsg(char *str) 175 { 176 if (__control_devkmsg(str) < 0) { 177 pr_warn("printk.devkmsg: bad option string '%s'\n", str); 178 return 1; 179 } 180 181 /* 182 * Set sysctl string accordingly: 183 */ 184 if (devkmsg_log == DEVKMSG_LOG_MASK_ON) 185 strcpy(devkmsg_log_str, "on"); 186 else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF) 187 strcpy(devkmsg_log_str, "off"); 188 /* else "ratelimit" which is set by default. */ 189 190 /* 191 * Sysctl cannot change it anymore. The kernel command line setting of 192 * this parameter is to force the setting to be permanent throughout the 193 * runtime of the system. This is a precation measure against userspace 194 * trying to be a smarta** and attempting to change it up on us. 195 */ 196 devkmsg_log |= DEVKMSG_LOG_MASK_LOCK; 197 198 return 1; 199 } 200 __setup("printk.devkmsg=", control_devkmsg); 201 202 char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit"; 203 #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) 204 int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, 205 void *buffer, size_t *lenp, loff_t *ppos) 206 { 207 char old_str[DEVKMSG_STR_MAX_SIZE]; 208 unsigned int old; 209 int err; 210 211 if (write) { 212 if (devkmsg_log & DEVKMSG_LOG_MASK_LOCK) 213 return -EINVAL; 214 215 old = devkmsg_log; 216 strncpy(old_str, devkmsg_log_str, DEVKMSG_STR_MAX_SIZE); 217 } 218 219 err = proc_dostring(table, write, buffer, lenp, ppos); 220 if (err) 221 return err; 222 223 if (write) { 224 err = __control_devkmsg(devkmsg_log_str); 225 226 /* 227 * Do not accept an unknown string OR a known string with 228 * trailing crap... 229 */ 230 if (err < 0 || (err + 1 != *lenp)) { 231 232 /* ... and restore old setting. */ 233 devkmsg_log = old; 234 strncpy(devkmsg_log_str, old_str, DEVKMSG_STR_MAX_SIZE); 235 236 return -EINVAL; 237 } 238 } 239 240 return 0; 241 } 242 #endif /* CONFIG_PRINTK && CONFIG_SYSCTL */ 243 244 /** 245 * console_list_lock - Lock the console list 246 * 247 * For console list or console->flags updates 248 */ 249 void console_list_lock(void) 250 { 251 /* 252 * In unregister_console() and console_force_preferred_locked(), 253 * synchronize_srcu() is called with the console_list_lock held. 254 * Therefore it is not allowed that the console_list_lock is taken 255 * with the srcu_lock held. 256 * 257 * Detecting if this context is really in the read-side critical 258 * section is only possible if the appropriate debug options are 259 * enabled. 260 */ 261 WARN_ON_ONCE(debug_lockdep_rcu_enabled() && 262 srcu_read_lock_held(&console_srcu)); 263 264 mutex_lock(&console_mutex); 265 } 266 EXPORT_SYMBOL(console_list_lock); 267 268 /** 269 * console_list_unlock - Unlock the console list 270 * 271 * Counterpart to console_list_lock() 272 */ 273 void console_list_unlock(void) 274 { 275 mutex_unlock(&console_mutex); 276 } 277 EXPORT_SYMBOL(console_list_unlock); 278 279 /** 280 * console_srcu_read_lock - Register a new reader for the 281 * SRCU-protected console list 282 * 283 * Use for_each_console_srcu() to iterate the console list 284 * 285 * Context: Any context. 286 * Return: A cookie to pass to console_srcu_read_unlock(). 287 */ 288 int console_srcu_read_lock(void) 289 { 290 return srcu_read_lock_nmisafe(&console_srcu); 291 } 292 EXPORT_SYMBOL(console_srcu_read_lock); 293 294 /** 295 * console_srcu_read_unlock - Unregister an old reader from 296 * the SRCU-protected console list 297 * @cookie: cookie returned from console_srcu_read_lock() 298 * 299 * Counterpart to console_srcu_read_lock() 300 */ 301 void console_srcu_read_unlock(int cookie) 302 { 303 srcu_read_unlock_nmisafe(&console_srcu, cookie); 304 } 305 EXPORT_SYMBOL(console_srcu_read_unlock); 306 307 /* 308 * Helper macros to handle lockdep when locking/unlocking console_sem. We use 309 * macros instead of functions so that _RET_IP_ contains useful information. 310 */ 311 #define down_console_sem() do { \ 312 down(&console_sem);\ 313 mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);\ 314 } while (0) 315 316 static int __down_trylock_console_sem(unsigned long ip) 317 { 318 int lock_failed; 319 unsigned long flags; 320 321 /* 322 * Here and in __up_console_sem() we need to be in safe mode, 323 * because spindump/WARN/etc from under console ->lock will 324 * deadlock in printk()->down_trylock_console_sem() otherwise. 325 */ 326 printk_safe_enter_irqsave(flags); 327 lock_failed = down_trylock(&console_sem); 328 printk_safe_exit_irqrestore(flags); 329 330 if (lock_failed) 331 return 1; 332 mutex_acquire(&console_lock_dep_map, 0, 1, ip); 333 return 0; 334 } 335 #define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_) 336 337 static void __up_console_sem(unsigned long ip) 338 { 339 unsigned long flags; 340 341 mutex_release(&console_lock_dep_map, ip); 342 343 printk_safe_enter_irqsave(flags); 344 up(&console_sem); 345 printk_safe_exit_irqrestore(flags); 346 } 347 #define up_console_sem() __up_console_sem(_RET_IP_) 348 349 static bool panic_in_progress(void) 350 { 351 return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); 352 } 353 354 /* 355 * This is used for debugging the mess that is the VT code by 356 * keeping track if we have the console semaphore held. It's 357 * definitely not the perfect debug tool (we don't know if _WE_ 358 * hold it and are racing, but it helps tracking those weird code 359 * paths in the console code where we end up in places I want 360 * locked without the console semaphore held). 361 */ 362 static int console_locked, console_suspended; 363 364 /* 365 * Array of consoles built from command line options (console=) 366 */ 367 368 #define MAX_CMDLINECONSOLES 8 369 370 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; 371 372 static int preferred_console = -1; 373 int console_set_on_cmdline; 374 EXPORT_SYMBOL(console_set_on_cmdline); 375 376 /* Flag: console code may call schedule() */ 377 static int console_may_schedule; 378 379 enum con_msg_format_flags { 380 MSG_FORMAT_DEFAULT = 0, 381 MSG_FORMAT_SYSLOG = (1 << 0), 382 }; 383 384 static int console_msg_format = MSG_FORMAT_DEFAULT; 385 386 /* 387 * The printk log buffer consists of a sequenced collection of records, each 388 * containing variable length message text. Every record also contains its 389 * own meta-data (@info). 390 * 391 * Every record meta-data carries the timestamp in microseconds, as well as 392 * the standard userspace syslog level and syslog facility. The usual kernel 393 * messages use LOG_KERN; userspace-injected messages always carry a matching 394 * syslog facility, by default LOG_USER. The origin of every message can be 395 * reliably determined that way. 396 * 397 * The human readable log message of a record is available in @text, the 398 * length of the message text in @text_len. The stored message is not 399 * terminated. 400 * 401 * Optionally, a record can carry a dictionary of properties (key/value 402 * pairs), to provide userspace with a machine-readable message context. 403 * 404 * Examples for well-defined, commonly used property names are: 405 * DEVICE=b12:8 device identifier 406 * b12:8 block dev_t 407 * c127:3 char dev_t 408 * n8 netdev ifindex 409 * +sound:card0 subsystem:devname 410 * SUBSYSTEM=pci driver-core subsystem name 411 * 412 * Valid characters in property names are [a-zA-Z0-9.-_]. Property names 413 * and values are terminated by a '\0' character. 414 * 415 * Example of record values: 416 * record.text_buf = "it's a line" (unterminated) 417 * record.info.seq = 56 418 * record.info.ts_nsec = 36863 419 * record.info.text_len = 11 420 * record.info.facility = 0 (LOG_KERN) 421 * record.info.flags = 0 422 * record.info.level = 3 (LOG_ERR) 423 * record.info.caller_id = 299 (task 299) 424 * record.info.dev_info.subsystem = "pci" (terminated) 425 * record.info.dev_info.device = "+pci:0000:00:01.0" (terminated) 426 * 427 * The 'struct printk_info' buffer must never be directly exported to 428 * userspace, it is a kernel-private implementation detail that might 429 * need to be changed in the future, when the requirements change. 430 * 431 * /dev/kmsg exports the structured data in the following line format: 432 * "<level>,<sequnum>,<timestamp>,<contflag>[,additional_values, ... ];<message text>\n" 433 * 434 * Users of the export format should ignore possible additional values 435 * separated by ',', and find the message after the ';' character. 436 * 437 * The optional key/value pairs are attached as continuation lines starting 438 * with a space character and terminated by a newline. All possible 439 * non-prinatable characters are escaped in the "\xff" notation. 440 */ 441 442 /* syslog_lock protects syslog_* variables and write access to clear_seq. */ 443 static DEFINE_MUTEX(syslog_lock); 444 445 #ifdef CONFIG_PRINTK 446 DECLARE_WAIT_QUEUE_HEAD(log_wait); 447 /* All 3 protected by @syslog_lock. */ 448 /* the next printk record to read by syslog(READ) or /proc/kmsg */ 449 static u64 syslog_seq; 450 static size_t syslog_partial; 451 static bool syslog_time; 452 453 struct latched_seq { 454 seqcount_latch_t latch; 455 u64 val[2]; 456 }; 457 458 /* 459 * The next printk record to read after the last 'clear' command. There are 460 * two copies (updated with seqcount_latch) so that reads can locklessly 461 * access a valid value. Writers are synchronized by @syslog_lock. 462 */ 463 static struct latched_seq clear_seq = { 464 .latch = SEQCNT_LATCH_ZERO(clear_seq.latch), 465 .val[0] = 0, 466 .val[1] = 0, 467 }; 468 469 #define LOG_LEVEL(v) ((v) & 0x07) 470 #define LOG_FACILITY(v) ((v) >> 3 & 0xff) 471 472 /* record buffer */ 473 #define LOG_ALIGN __alignof__(unsigned long) 474 #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) 475 #define LOG_BUF_LEN_MAX (u32)(1 << 31) 476 static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); 477 static char *log_buf = __log_buf; 478 static u32 log_buf_len = __LOG_BUF_LEN; 479 480 /* 481 * Define the average message size. This only affects the number of 482 * descriptors that will be available. Underestimating is better than 483 * overestimating (too many available descriptors is better than not enough). 484 */ 485 #define PRB_AVGBITS 5 /* 32 character average length */ 486 487 #if CONFIG_LOG_BUF_SHIFT <= PRB_AVGBITS 488 #error CONFIG_LOG_BUF_SHIFT value too small. 489 #endif 490 _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, 491 PRB_AVGBITS, &__log_buf[0]); 492 493 static struct printk_ringbuffer printk_rb_dynamic; 494 495 static struct printk_ringbuffer *prb = &printk_rb_static; 496 497 /* 498 * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before 499 * per_cpu_areas are initialised. This variable is set to true when 500 * it's safe to access per-CPU data. 501 */ 502 static bool __printk_percpu_data_ready __ro_after_init; 503 504 bool printk_percpu_data_ready(void) 505 { 506 return __printk_percpu_data_ready; 507 } 508 509 /* Must be called under syslog_lock. */ 510 static void latched_seq_write(struct latched_seq *ls, u64 val) 511 { 512 raw_write_seqcount_latch(&ls->latch); 513 ls->val[0] = val; 514 raw_write_seqcount_latch(&ls->latch); 515 ls->val[1] = val; 516 } 517 518 /* Can be called from any context. */ 519 static u64 latched_seq_read_nolock(struct latched_seq *ls) 520 { 521 unsigned int seq; 522 unsigned int idx; 523 u64 val; 524 525 do { 526 seq = raw_read_seqcount_latch(&ls->latch); 527 idx = seq & 0x1; 528 val = ls->val[idx]; 529 } while (read_seqcount_latch_retry(&ls->latch, seq)); 530 531 return val; 532 } 533 534 /* Return log buffer address */ 535 char *log_buf_addr_get(void) 536 { 537 return log_buf; 538 } 539 540 /* Return log buffer size */ 541 u32 log_buf_len_get(void) 542 { 543 return log_buf_len; 544 } 545 546 /* 547 * Define how much of the log buffer we could take at maximum. The value 548 * must be greater than two. Note that only half of the buffer is available 549 * when the index points to the middle. 550 */ 551 #define MAX_LOG_TAKE_PART 4 552 static const char trunc_msg[] = "<truncated>"; 553 554 static void truncate_msg(u16 *text_len, u16 *trunc_msg_len) 555 { 556 /* 557 * The message should not take the whole buffer. Otherwise, it might 558 * get removed too soon. 559 */ 560 u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART; 561 562 if (*text_len > max_text_len) 563 *text_len = max_text_len; 564 565 /* enable the warning message (if there is room) */ 566 *trunc_msg_len = strlen(trunc_msg); 567 if (*text_len >= *trunc_msg_len) 568 *text_len -= *trunc_msg_len; 569 else 570 *trunc_msg_len = 0; 571 } 572 573 int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT); 574 575 static int syslog_action_restricted(int type) 576 { 577 if (dmesg_restrict) 578 return 1; 579 /* 580 * Unless restricted, we allow "read all" and "get buffer size" 581 * for everybody. 582 */ 583 return type != SYSLOG_ACTION_READ_ALL && 584 type != SYSLOG_ACTION_SIZE_BUFFER; 585 } 586 587 static int check_syslog_permissions(int type, int source) 588 { 589 /* 590 * If this is from /proc/kmsg and we've already opened it, then we've 591 * already done the capabilities checks at open time. 592 */ 593 if (source == SYSLOG_FROM_PROC && type != SYSLOG_ACTION_OPEN) 594 goto ok; 595 596 if (syslog_action_restricted(type)) { 597 if (capable(CAP_SYSLOG)) 598 goto ok; 599 /* 600 * For historical reasons, accept CAP_SYS_ADMIN too, with 601 * a warning. 602 */ 603 if (capable(CAP_SYS_ADMIN)) { 604 pr_warn_once("%s (%d): Attempt to access syslog with " 605 "CAP_SYS_ADMIN but no CAP_SYSLOG " 606 "(deprecated).\n", 607 current->comm, task_pid_nr(current)); 608 goto ok; 609 } 610 return -EPERM; 611 } 612 ok: 613 return security_syslog(type); 614 } 615 616 static void append_char(char **pp, char *e, char c) 617 { 618 if (*pp < e) 619 *(*pp)++ = c; 620 } 621 622 static ssize_t info_print_ext_header(char *buf, size_t size, 623 struct printk_info *info) 624 { 625 u64 ts_usec = info->ts_nsec; 626 char caller[20]; 627 #ifdef CONFIG_PRINTK_CALLER 628 u32 id = info->caller_id; 629 630 snprintf(caller, sizeof(caller), ",caller=%c%u", 631 id & 0x80000000 ? 'C' : 'T', id & ~0x80000000); 632 #else 633 caller[0] = '\0'; 634 #endif 635 636 do_div(ts_usec, 1000); 637 638 return scnprintf(buf, size, "%u,%llu,%llu,%c%s;", 639 (info->facility << 3) | info->level, info->seq, 640 ts_usec, info->flags & LOG_CONT ? 'c' : '-', caller); 641 } 642 643 static ssize_t msg_add_ext_text(char *buf, size_t size, 644 const char *text, size_t text_len, 645 unsigned char endc) 646 { 647 char *p = buf, *e = buf + size; 648 size_t i; 649 650 /* escape non-printable characters */ 651 for (i = 0; i < text_len; i++) { 652 unsigned char c = text[i]; 653 654 if (c < ' ' || c >= 127 || c == '\\') 655 p += scnprintf(p, e - p, "\\x%02x", c); 656 else 657 append_char(&p, e, c); 658 } 659 append_char(&p, e, endc); 660 661 return p - buf; 662 } 663 664 static ssize_t msg_add_dict_text(char *buf, size_t size, 665 const char *key, const char *val) 666 { 667 size_t val_len = strlen(val); 668 ssize_t len; 669 670 if (!val_len) 671 return 0; 672 673 len = msg_add_ext_text(buf, size, "", 0, ' '); /* dict prefix */ 674 len += msg_add_ext_text(buf + len, size - len, key, strlen(key), '='); 675 len += msg_add_ext_text(buf + len, size - len, val, val_len, '\n'); 676 677 return len; 678 } 679 680 static ssize_t msg_print_ext_body(char *buf, size_t size, 681 char *text, size_t text_len, 682 struct dev_printk_info *dev_info) 683 { 684 ssize_t len; 685 686 len = msg_add_ext_text(buf, size, text, text_len, '\n'); 687 688 if (!dev_info) 689 goto out; 690 691 len += msg_add_dict_text(buf + len, size - len, "SUBSYSTEM", 692 dev_info->subsystem); 693 len += msg_add_dict_text(buf + len, size - len, "DEVICE", 694 dev_info->device); 695 out: 696 return len; 697 } 698 699 static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, 700 bool is_extended, bool may_supress); 701 702 /* /dev/kmsg - userspace message inject/listen interface */ 703 struct devkmsg_user { 704 atomic64_t seq; 705 struct ratelimit_state rs; 706 struct mutex lock; 707 struct printk_buffers pbufs; 708 }; 709 710 static __printf(3, 4) __cold 711 int devkmsg_emit(int facility, int level, const char *fmt, ...) 712 { 713 va_list args; 714 int r; 715 716 va_start(args, fmt); 717 r = vprintk_emit(facility, level, NULL, fmt, args); 718 va_end(args); 719 720 return r; 721 } 722 723 static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) 724 { 725 char *buf, *line; 726 int level = default_message_loglevel; 727 int facility = 1; /* LOG_USER */ 728 struct file *file = iocb->ki_filp; 729 struct devkmsg_user *user = file->private_data; 730 size_t len = iov_iter_count(from); 731 ssize_t ret = len; 732 733 if (len > PRINTKRB_RECORD_MAX) 734 return -EINVAL; 735 736 /* Ignore when user logging is disabled. */ 737 if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) 738 return len; 739 740 /* Ratelimit when not explicitly enabled. */ 741 if (!(devkmsg_log & DEVKMSG_LOG_MASK_ON)) { 742 if (!___ratelimit(&user->rs, current->comm)) 743 return ret; 744 } 745 746 buf = kmalloc(len+1, GFP_KERNEL); 747 if (buf == NULL) 748 return -ENOMEM; 749 750 buf[len] = '\0'; 751 if (!copy_from_iter_full(buf, len, from)) { 752 kfree(buf); 753 return -EFAULT; 754 } 755 756 /* 757 * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace 758 * the decimal value represents 32bit, the lower 3 bit are the log 759 * level, the rest are the log facility. 760 * 761 * If no prefix or no userspace facility is specified, we 762 * enforce LOG_USER, to be able to reliably distinguish 763 * kernel-generated messages from userspace-injected ones. 764 */ 765 line = buf; 766 if (line[0] == '<') { 767 char *endp = NULL; 768 unsigned int u; 769 770 u = simple_strtoul(line + 1, &endp, 10); 771 if (endp && endp[0] == '>') { 772 level = LOG_LEVEL(u); 773 if (LOG_FACILITY(u) != 0) 774 facility = LOG_FACILITY(u); 775 endp++; 776 line = endp; 777 } 778 } 779 780 devkmsg_emit(facility, level, "%s", line); 781 kfree(buf); 782 return ret; 783 } 784 785 static ssize_t devkmsg_read(struct file *file, char __user *buf, 786 size_t count, loff_t *ppos) 787 { 788 struct devkmsg_user *user = file->private_data; 789 char *outbuf = &user->pbufs.outbuf[0]; 790 struct printk_message pmsg = { 791 .pbufs = &user->pbufs, 792 }; 793 ssize_t ret; 794 795 ret = mutex_lock_interruptible(&user->lock); 796 if (ret) 797 return ret; 798 799 if (!printk_get_next_message(&pmsg, atomic64_read(&user->seq), true, false)) { 800 if (file->f_flags & O_NONBLOCK) { 801 ret = -EAGAIN; 802 goto out; 803 } 804 805 /* 806 * Guarantee this task is visible on the waitqueue before 807 * checking the wake condition. 808 * 809 * The full memory barrier within set_current_state() of 810 * prepare_to_wait_event() pairs with the full memory barrier 811 * within wq_has_sleeper(). 812 * 813 * This pairs with __wake_up_klogd:A. 814 */ 815 ret = wait_event_interruptible(log_wait, 816 printk_get_next_message(&pmsg, atomic64_read(&user->seq), true, 817 false)); /* LMM(devkmsg_read:A) */ 818 if (ret) 819 goto out; 820 } 821 822 if (pmsg.dropped) { 823 /* our last seen message is gone, return error and reset */ 824 atomic64_set(&user->seq, pmsg.seq); 825 ret = -EPIPE; 826 goto out; 827 } 828 829 atomic64_set(&user->seq, pmsg.seq + 1); 830 831 if (pmsg.outbuf_len > count) { 832 ret = -EINVAL; 833 goto out; 834 } 835 836 if (copy_to_user(buf, outbuf, pmsg.outbuf_len)) { 837 ret = -EFAULT; 838 goto out; 839 } 840 ret = pmsg.outbuf_len; 841 out: 842 mutex_unlock(&user->lock); 843 return ret; 844 } 845 846 /* 847 * Be careful when modifying this function!!! 848 * 849 * Only few operations are supported because the device works only with the 850 * entire variable length messages (records). Non-standard values are 851 * returned in the other cases and has been this way for quite some time. 852 * User space applications might depend on this behavior. 853 */ 854 static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) 855 { 856 struct devkmsg_user *user = file->private_data; 857 loff_t ret = 0; 858 859 if (offset) 860 return -ESPIPE; 861 862 switch (whence) { 863 case SEEK_SET: 864 /* the first record */ 865 atomic64_set(&user->seq, prb_first_valid_seq(prb)); 866 break; 867 case SEEK_DATA: 868 /* 869 * The first record after the last SYSLOG_ACTION_CLEAR, 870 * like issued by 'dmesg -c'. Reading /dev/kmsg itself 871 * changes no global state, and does not clear anything. 872 */ 873 atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq)); 874 break; 875 case SEEK_END: 876 /* after the last record */ 877 atomic64_set(&user->seq, prb_next_seq(prb)); 878 break; 879 default: 880 ret = -EINVAL; 881 } 882 return ret; 883 } 884 885 static __poll_t devkmsg_poll(struct file *file, poll_table *wait) 886 { 887 struct devkmsg_user *user = file->private_data; 888 struct printk_info info; 889 __poll_t ret = 0; 890 891 poll_wait(file, &log_wait, wait); 892 893 if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) { 894 /* return error when data has vanished underneath us */ 895 if (info.seq != atomic64_read(&user->seq)) 896 ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; 897 else 898 ret = EPOLLIN|EPOLLRDNORM; 899 } 900 901 return ret; 902 } 903 904 static int devkmsg_open(struct inode *inode, struct file *file) 905 { 906 struct devkmsg_user *user; 907 int err; 908 909 if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) 910 return -EPERM; 911 912 /* write-only does not need any file context */ 913 if ((file->f_flags & O_ACCMODE) != O_WRONLY) { 914 err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL, 915 SYSLOG_FROM_READER); 916 if (err) 917 return err; 918 } 919 920 user = kvmalloc(sizeof(struct devkmsg_user), GFP_KERNEL); 921 if (!user) 922 return -ENOMEM; 923 924 ratelimit_default_init(&user->rs); 925 ratelimit_set_flags(&user->rs, RATELIMIT_MSG_ON_RELEASE); 926 927 mutex_init(&user->lock); 928 929 atomic64_set(&user->seq, prb_first_valid_seq(prb)); 930 931 file->private_data = user; 932 return 0; 933 } 934 935 static int devkmsg_release(struct inode *inode, struct file *file) 936 { 937 struct devkmsg_user *user = file->private_data; 938 939 ratelimit_state_exit(&user->rs); 940 941 mutex_destroy(&user->lock); 942 kvfree(user); 943 return 0; 944 } 945 946 const struct file_operations kmsg_fops = { 947 .open = devkmsg_open, 948 .read = devkmsg_read, 949 .write_iter = devkmsg_write, 950 .llseek = devkmsg_llseek, 951 .poll = devkmsg_poll, 952 .release = devkmsg_release, 953 }; 954 955 #ifdef CONFIG_CRASH_CORE 956 /* 957 * This appends the listed symbols to /proc/vmcore 958 * 959 * /proc/vmcore is used by various utilities, like crash and makedumpfile to 960 * obtain access to symbols that are otherwise very difficult to locate. These 961 * symbols are specifically used so that utilities can access and extract the 962 * dmesg log from a vmcore file after a crash. 963 */ 964 void log_buf_vmcoreinfo_setup(void) 965 { 966 struct dev_printk_info *dev_info = NULL; 967 968 VMCOREINFO_SYMBOL(prb); 969 VMCOREINFO_SYMBOL(printk_rb_static); 970 VMCOREINFO_SYMBOL(clear_seq); 971 972 /* 973 * Export struct size and field offsets. User space tools can 974 * parse it and detect any changes to structure down the line. 975 */ 976 977 VMCOREINFO_STRUCT_SIZE(printk_ringbuffer); 978 VMCOREINFO_OFFSET(printk_ringbuffer, desc_ring); 979 VMCOREINFO_OFFSET(printk_ringbuffer, text_data_ring); 980 VMCOREINFO_OFFSET(printk_ringbuffer, fail); 981 982 VMCOREINFO_STRUCT_SIZE(prb_desc_ring); 983 VMCOREINFO_OFFSET(prb_desc_ring, count_bits); 984 VMCOREINFO_OFFSET(prb_desc_ring, descs); 985 VMCOREINFO_OFFSET(prb_desc_ring, infos); 986 VMCOREINFO_OFFSET(prb_desc_ring, head_id); 987 VMCOREINFO_OFFSET(prb_desc_ring, tail_id); 988 989 VMCOREINFO_STRUCT_SIZE(prb_desc); 990 VMCOREINFO_OFFSET(prb_desc, state_var); 991 VMCOREINFO_OFFSET(prb_desc, text_blk_lpos); 992 993 VMCOREINFO_STRUCT_SIZE(prb_data_blk_lpos); 994 VMCOREINFO_OFFSET(prb_data_blk_lpos, begin); 995 VMCOREINFO_OFFSET(prb_data_blk_lpos, next); 996 997 VMCOREINFO_STRUCT_SIZE(printk_info); 998 VMCOREINFO_OFFSET(printk_info, seq); 999 VMCOREINFO_OFFSET(printk_info, ts_nsec); 1000 VMCOREINFO_OFFSET(printk_info, text_len); 1001 VMCOREINFO_OFFSET(printk_info, caller_id); 1002 VMCOREINFO_OFFSET(printk_info, dev_info); 1003 1004 VMCOREINFO_STRUCT_SIZE(dev_printk_info); 1005 VMCOREINFO_OFFSET(dev_printk_info, subsystem); 1006 VMCOREINFO_LENGTH(printk_info_subsystem, sizeof(dev_info->subsystem)); 1007 VMCOREINFO_OFFSET(dev_printk_info, device); 1008 VMCOREINFO_LENGTH(printk_info_device, sizeof(dev_info->device)); 1009 1010 VMCOREINFO_STRUCT_SIZE(prb_data_ring); 1011 VMCOREINFO_OFFSET(prb_data_ring, size_bits); 1012 VMCOREINFO_OFFSET(prb_data_ring, data); 1013 VMCOREINFO_OFFSET(prb_data_ring, head_lpos); 1014 VMCOREINFO_OFFSET(prb_data_ring, tail_lpos); 1015 1016 VMCOREINFO_SIZE(atomic_long_t); 1017 VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter); 1018 1019 VMCOREINFO_STRUCT_SIZE(latched_seq); 1020 VMCOREINFO_OFFSET(latched_seq, val); 1021 } 1022 #endif 1023 1024 /* requested log_buf_len from kernel cmdline */ 1025 static unsigned long __initdata new_log_buf_len; 1026 1027 /* we practice scaling the ring buffer by powers of 2 */ 1028 static void __init log_buf_len_update(u64 size) 1029 { 1030 if (size > (u64)LOG_BUF_LEN_MAX) { 1031 size = (u64)LOG_BUF_LEN_MAX; 1032 pr_err("log_buf over 2G is not supported.\n"); 1033 } 1034 1035 if (size) 1036 size = roundup_pow_of_two(size); 1037 if (size > log_buf_len) 1038 new_log_buf_len = (unsigned long)size; 1039 } 1040 1041 /* save requested log_buf_len since it's too early to process it */ 1042 static int __init log_buf_len_setup(char *str) 1043 { 1044 u64 size; 1045 1046 if (!str) 1047 return -EINVAL; 1048 1049 size = memparse(str, &str); 1050 1051 log_buf_len_update(size); 1052 1053 return 0; 1054 } 1055 early_param("log_buf_len", log_buf_len_setup); 1056 1057 #ifdef CONFIG_SMP 1058 #define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT) 1059 1060 static void __init log_buf_add_cpu(void) 1061 { 1062 unsigned int cpu_extra; 1063 1064 /* 1065 * archs should set up cpu_possible_bits properly with 1066 * set_cpu_possible() after setup_arch() but just in 1067 * case lets ensure this is valid. 1068 */ 1069 if (num_possible_cpus() == 1) 1070 return; 1071 1072 cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN; 1073 1074 /* by default this will only continue through for large > 64 CPUs */ 1075 if (cpu_extra <= __LOG_BUF_LEN / 2) 1076 return; 1077 1078 pr_info("log_buf_len individual max cpu contribution: %d bytes\n", 1079 __LOG_CPU_MAX_BUF_LEN); 1080 pr_info("log_buf_len total cpu_extra contributions: %d bytes\n", 1081 cpu_extra); 1082 pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN); 1083 1084 log_buf_len_update(cpu_extra + __LOG_BUF_LEN); 1085 } 1086 #else /* !CONFIG_SMP */ 1087 static inline void log_buf_add_cpu(void) {} 1088 #endif /* CONFIG_SMP */ 1089 1090 static void __init set_percpu_data_ready(void) 1091 { 1092 __printk_percpu_data_ready = true; 1093 } 1094 1095 static unsigned int __init add_to_rb(struct printk_ringbuffer *rb, 1096 struct printk_record *r) 1097 { 1098 struct prb_reserved_entry e; 1099 struct printk_record dest_r; 1100 1101 prb_rec_init_wr(&dest_r, r->info->text_len); 1102 1103 if (!prb_reserve(&e, rb, &dest_r)) 1104 return 0; 1105 1106 memcpy(&dest_r.text_buf[0], &r->text_buf[0], r->info->text_len); 1107 dest_r.info->text_len = r->info->text_len; 1108 dest_r.info->facility = r->info->facility; 1109 dest_r.info->level = r->info->level; 1110 dest_r.info->flags = r->info->flags; 1111 dest_r.info->ts_nsec = r->info->ts_nsec; 1112 dest_r.info->caller_id = r->info->caller_id; 1113 memcpy(&dest_r.info->dev_info, &r->info->dev_info, sizeof(dest_r.info->dev_info)); 1114 1115 prb_final_commit(&e); 1116 1117 return prb_record_text_space(&e); 1118 } 1119 1120 static char setup_text_buf[PRINTKRB_RECORD_MAX] __initdata; 1121 1122 void __init setup_log_buf(int early) 1123 { 1124 struct printk_info *new_infos; 1125 unsigned int new_descs_count; 1126 struct prb_desc *new_descs; 1127 struct printk_info info; 1128 struct printk_record r; 1129 unsigned int text_size; 1130 size_t new_descs_size; 1131 size_t new_infos_size; 1132 unsigned long flags; 1133 char *new_log_buf; 1134 unsigned int free; 1135 u64 seq; 1136 1137 /* 1138 * Some archs call setup_log_buf() multiple times - first is very 1139 * early, e.g. from setup_arch(), and second - when percpu_areas 1140 * are initialised. 1141 */ 1142 if (!early) 1143 set_percpu_data_ready(); 1144 1145 if (log_buf != __log_buf) 1146 return; 1147 1148 if (!early && !new_log_buf_len) 1149 log_buf_add_cpu(); 1150 1151 if (!new_log_buf_len) 1152 return; 1153 1154 new_descs_count = new_log_buf_len >> PRB_AVGBITS; 1155 if (new_descs_count == 0) { 1156 pr_err("new_log_buf_len: %lu too small\n", new_log_buf_len); 1157 return; 1158 } 1159 1160 new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN); 1161 if (unlikely(!new_log_buf)) { 1162 pr_err("log_buf_len: %lu text bytes not available\n", 1163 new_log_buf_len); 1164 return; 1165 } 1166 1167 new_descs_size = new_descs_count * sizeof(struct prb_desc); 1168 new_descs = memblock_alloc(new_descs_size, LOG_ALIGN); 1169 if (unlikely(!new_descs)) { 1170 pr_err("log_buf_len: %zu desc bytes not available\n", 1171 new_descs_size); 1172 goto err_free_log_buf; 1173 } 1174 1175 new_infos_size = new_descs_count * sizeof(struct printk_info); 1176 new_infos = memblock_alloc(new_infos_size, LOG_ALIGN); 1177 if (unlikely(!new_infos)) { 1178 pr_err("log_buf_len: %zu info bytes not available\n", 1179 new_infos_size); 1180 goto err_free_descs; 1181 } 1182 1183 prb_rec_init_rd(&r, &info, &setup_text_buf[0], sizeof(setup_text_buf)); 1184 1185 prb_init(&printk_rb_dynamic, 1186 new_log_buf, ilog2(new_log_buf_len), 1187 new_descs, ilog2(new_descs_count), 1188 new_infos); 1189 1190 local_irq_save(flags); 1191 1192 log_buf_len = new_log_buf_len; 1193 log_buf = new_log_buf; 1194 new_log_buf_len = 0; 1195 1196 free = __LOG_BUF_LEN; 1197 prb_for_each_record(0, &printk_rb_static, seq, &r) { 1198 text_size = add_to_rb(&printk_rb_dynamic, &r); 1199 if (text_size > free) 1200 free = 0; 1201 else 1202 free -= text_size; 1203 } 1204 1205 prb = &printk_rb_dynamic; 1206 1207 local_irq_restore(flags); 1208 1209 /* 1210 * Copy any remaining messages that might have appeared from 1211 * NMI context after copying but before switching to the 1212 * dynamic buffer. 1213 */ 1214 prb_for_each_record(seq, &printk_rb_static, seq, &r) { 1215 text_size = add_to_rb(&printk_rb_dynamic, &r); 1216 if (text_size > free) 1217 free = 0; 1218 else 1219 free -= text_size; 1220 } 1221 1222 if (seq != prb_next_seq(&printk_rb_static)) { 1223 pr_err("dropped %llu messages\n", 1224 prb_next_seq(&printk_rb_static) - seq); 1225 } 1226 1227 pr_info("log_buf_len: %u bytes\n", log_buf_len); 1228 pr_info("early log buf free: %u(%u%%)\n", 1229 free, (free * 100) / __LOG_BUF_LEN); 1230 return; 1231 1232 err_free_descs: 1233 memblock_free(new_descs, new_descs_size); 1234 err_free_log_buf: 1235 memblock_free(new_log_buf, new_log_buf_len); 1236 } 1237 1238 static bool __read_mostly ignore_loglevel; 1239 1240 static int __init ignore_loglevel_setup(char *str) 1241 { 1242 ignore_loglevel = true; 1243 pr_info("debug: ignoring loglevel setting.\n"); 1244 1245 return 0; 1246 } 1247 1248 early_param("ignore_loglevel", ignore_loglevel_setup); 1249 module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR); 1250 MODULE_PARM_DESC(ignore_loglevel, 1251 "ignore loglevel setting (prints all kernel messages to the console)"); 1252 1253 static bool suppress_message_printing(int level) 1254 { 1255 return (level >= console_loglevel && !ignore_loglevel); 1256 } 1257 1258 #ifdef CONFIG_BOOT_PRINTK_DELAY 1259 1260 static int boot_delay; /* msecs delay after each printk during bootup */ 1261 static unsigned long long loops_per_msec; /* based on boot_delay */ 1262 1263 static int __init boot_delay_setup(char *str) 1264 { 1265 unsigned long lpj; 1266 1267 lpj = preset_lpj ? preset_lpj : 1000000; /* some guess */ 1268 loops_per_msec = (unsigned long long)lpj / 1000 * HZ; 1269 1270 get_option(&str, &boot_delay); 1271 if (boot_delay > 10 * 1000) 1272 boot_delay = 0; 1273 1274 pr_debug("boot_delay: %u, preset_lpj: %ld, lpj: %lu, " 1275 "HZ: %d, loops_per_msec: %llu\n", 1276 boot_delay, preset_lpj, lpj, HZ, loops_per_msec); 1277 return 0; 1278 } 1279 early_param("boot_delay", boot_delay_setup); 1280 1281 static void boot_delay_msec(int level) 1282 { 1283 unsigned long long k; 1284 unsigned long timeout; 1285 1286 if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING) 1287 || suppress_message_printing(level)) { 1288 return; 1289 } 1290 1291 k = (unsigned long long)loops_per_msec * boot_delay; 1292 1293 timeout = jiffies + msecs_to_jiffies(boot_delay); 1294 while (k) { 1295 k--; 1296 cpu_relax(); 1297 /* 1298 * use (volatile) jiffies to prevent 1299 * compiler reduction; loop termination via jiffies 1300 * is secondary and may or may not happen. 1301 */ 1302 if (time_after(jiffies, timeout)) 1303 break; 1304 touch_nmi_watchdog(); 1305 } 1306 } 1307 #else 1308 static inline void boot_delay_msec(int level) 1309 { 1310 } 1311 #endif 1312 1313 static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME); 1314 module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); 1315 1316 static size_t print_syslog(unsigned int level, char *buf) 1317 { 1318 return sprintf(buf, "<%u>", level); 1319 } 1320 1321 static size_t print_time(u64 ts, char *buf) 1322 { 1323 unsigned long rem_nsec = do_div(ts, 1000000000); 1324 1325 return sprintf(buf, "[%5lu.%06lu]", 1326 (unsigned long)ts, rem_nsec / 1000); 1327 } 1328 1329 #ifdef CONFIG_PRINTK_CALLER 1330 static size_t print_caller(u32 id, char *buf) 1331 { 1332 char caller[12]; 1333 1334 snprintf(caller, sizeof(caller), "%c%u", 1335 id & 0x80000000 ? 'C' : 'T', id & ~0x80000000); 1336 return sprintf(buf, "[%6s]", caller); 1337 } 1338 #else 1339 #define print_caller(id, buf) 0 1340 #endif 1341 1342 static size_t info_print_prefix(const struct printk_info *info, bool syslog, 1343 bool time, char *buf) 1344 { 1345 size_t len = 0; 1346 1347 if (syslog) 1348 len = print_syslog((info->facility << 3) | info->level, buf); 1349 1350 if (time) 1351 len += print_time(info->ts_nsec, buf + len); 1352 1353 len += print_caller(info->caller_id, buf + len); 1354 1355 if (IS_ENABLED(CONFIG_PRINTK_CALLER) || time) { 1356 buf[len++] = ' '; 1357 buf[len] = '\0'; 1358 } 1359 1360 return len; 1361 } 1362 1363 /* 1364 * Prepare the record for printing. The text is shifted within the given 1365 * buffer to avoid a need for another one. The following operations are 1366 * done: 1367 * 1368 * - Add prefix for each line. 1369 * - Drop truncated lines that no longer fit into the buffer. 1370 * - Add the trailing newline that has been removed in vprintk_store(). 1371 * - Add a string terminator. 1372 * 1373 * Since the produced string is always terminated, the maximum possible 1374 * return value is @r->text_buf_size - 1; 1375 * 1376 * Return: The length of the updated/prepared text, including the added 1377 * prefixes and the newline. The terminator is not counted. The dropped 1378 * line(s) are not counted. 1379 */ 1380 static size_t record_print_text(struct printk_record *r, bool syslog, 1381 bool time) 1382 { 1383 size_t text_len = r->info->text_len; 1384 size_t buf_size = r->text_buf_size; 1385 char *text = r->text_buf; 1386 char prefix[PRINTK_PREFIX_MAX]; 1387 bool truncated = false; 1388 size_t prefix_len; 1389 size_t line_len; 1390 size_t len = 0; 1391 char *next; 1392 1393 /* 1394 * If the message was truncated because the buffer was not large 1395 * enough, treat the available text as if it were the full text. 1396 */ 1397 if (text_len > buf_size) 1398 text_len = buf_size; 1399 1400 prefix_len = info_print_prefix(r->info, syslog, time, prefix); 1401 1402 /* 1403 * @text_len: bytes of unprocessed text 1404 * @line_len: bytes of current line _without_ newline 1405 * @text: pointer to beginning of current line 1406 * @len: number of bytes prepared in r->text_buf 1407 */ 1408 for (;;) { 1409 next = memchr(text, '\n', text_len); 1410 if (next) { 1411 line_len = next - text; 1412 } else { 1413 /* Drop truncated line(s). */ 1414 if (truncated) 1415 break; 1416 line_len = text_len; 1417 } 1418 1419 /* 1420 * Truncate the text if there is not enough space to add the 1421 * prefix and a trailing newline and a terminator. 1422 */ 1423 if (len + prefix_len + text_len + 1 + 1 > buf_size) { 1424 /* Drop even the current line if no space. */ 1425 if (len + prefix_len + line_len + 1 + 1 > buf_size) 1426 break; 1427 1428 text_len = buf_size - len - prefix_len - 1 - 1; 1429 truncated = true; 1430 } 1431 1432 memmove(text + prefix_len, text, text_len); 1433 memcpy(text, prefix, prefix_len); 1434 1435 /* 1436 * Increment the prepared length to include the text and 1437 * prefix that were just moved+copied. Also increment for the 1438 * newline at the end of this line. If this is the last line, 1439 * there is no newline, but it will be added immediately below. 1440 */ 1441 len += prefix_len + line_len + 1; 1442 if (text_len == line_len) { 1443 /* 1444 * This is the last line. Add the trailing newline 1445 * removed in vprintk_store(). 1446 */ 1447 text[prefix_len + line_len] = '\n'; 1448 break; 1449 } 1450 1451 /* 1452 * Advance beyond the added prefix and the related line with 1453 * its newline. 1454 */ 1455 text += prefix_len + line_len + 1; 1456 1457 /* 1458 * The remaining text has only decreased by the line with its 1459 * newline. 1460 * 1461 * Note that @text_len can become zero. It happens when @text 1462 * ended with a newline (either due to truncation or the 1463 * original string ending with "\n\n"). The loop is correctly 1464 * repeated and (if not truncated) an empty line with a prefix 1465 * will be prepared. 1466 */ 1467 text_len -= line_len + 1; 1468 } 1469 1470 /* 1471 * If a buffer was provided, it will be terminated. Space for the 1472 * string terminator is guaranteed to be available. The terminator is 1473 * not counted in the return value. 1474 */ 1475 if (buf_size > 0) 1476 r->text_buf[len] = 0; 1477 1478 return len; 1479 } 1480 1481 static size_t get_record_print_text_size(struct printk_info *info, 1482 unsigned int line_count, 1483 bool syslog, bool time) 1484 { 1485 char prefix[PRINTK_PREFIX_MAX]; 1486 size_t prefix_len; 1487 1488 prefix_len = info_print_prefix(info, syslog, time, prefix); 1489 1490 /* 1491 * Each line will be preceded with a prefix. The intermediate 1492 * newlines are already within the text, but a final trailing 1493 * newline will be added. 1494 */ 1495 return ((prefix_len * line_count) + info->text_len + 1); 1496 } 1497 1498 /* 1499 * Beginning with @start_seq, find the first record where it and all following 1500 * records up to (but not including) @max_seq fit into @size. 1501 * 1502 * @max_seq is simply an upper bound and does not need to exist. If the caller 1503 * does not require an upper bound, -1 can be used for @max_seq. 1504 */ 1505 static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size, 1506 bool syslog, bool time) 1507 { 1508 struct printk_info info; 1509 unsigned int line_count; 1510 size_t len = 0; 1511 u64 seq; 1512 1513 /* Determine the size of the records up to @max_seq. */ 1514 prb_for_each_info(start_seq, prb, seq, &info, &line_count) { 1515 if (info.seq >= max_seq) 1516 break; 1517 len += get_record_print_text_size(&info, line_count, syslog, time); 1518 } 1519 1520 /* 1521 * Adjust the upper bound for the next loop to avoid subtracting 1522 * lengths that were never added. 1523 */ 1524 if (seq < max_seq) 1525 max_seq = seq; 1526 1527 /* 1528 * Move first record forward until length fits into the buffer. Ignore 1529 * newest messages that were not counted in the above cycle. Messages 1530 * might appear and get lost in the meantime. This is a best effort 1531 * that prevents an infinite loop that could occur with a retry. 1532 */ 1533 prb_for_each_info(start_seq, prb, seq, &info, &line_count) { 1534 if (len <= size || info.seq >= max_seq) 1535 break; 1536 len -= get_record_print_text_size(&info, line_count, syslog, time); 1537 } 1538 1539 return seq; 1540 } 1541 1542 /* The caller is responsible for making sure @size is greater than 0. */ 1543 static int syslog_print(char __user *buf, int size) 1544 { 1545 struct printk_info info; 1546 struct printk_record r; 1547 char *text; 1548 int len = 0; 1549 u64 seq; 1550 1551 text = kmalloc(PRINTK_MESSAGE_MAX, GFP_KERNEL); 1552 if (!text) 1553 return -ENOMEM; 1554 1555 prb_rec_init_rd(&r, &info, text, PRINTK_MESSAGE_MAX); 1556 1557 mutex_lock(&syslog_lock); 1558 1559 /* 1560 * Wait for the @syslog_seq record to be available. @syslog_seq may 1561 * change while waiting. 1562 */ 1563 do { 1564 seq = syslog_seq; 1565 1566 mutex_unlock(&syslog_lock); 1567 /* 1568 * Guarantee this task is visible on the waitqueue before 1569 * checking the wake condition. 1570 * 1571 * The full memory barrier within set_current_state() of 1572 * prepare_to_wait_event() pairs with the full memory barrier 1573 * within wq_has_sleeper(). 1574 * 1575 * This pairs with __wake_up_klogd:A. 1576 */ 1577 len = wait_event_interruptible(log_wait, 1578 prb_read_valid(prb, seq, NULL)); /* LMM(syslog_print:A) */ 1579 mutex_lock(&syslog_lock); 1580 1581 if (len) 1582 goto out; 1583 } while (syslog_seq != seq); 1584 1585 /* 1586 * Copy records that fit into the buffer. The above cycle makes sure 1587 * that the first record is always available. 1588 */ 1589 do { 1590 size_t n; 1591 size_t skip; 1592 int err; 1593 1594 if (!prb_read_valid(prb, syslog_seq, &r)) 1595 break; 1596 1597 if (r.info->seq != syslog_seq) { 1598 /* message is gone, move to next valid one */ 1599 syslog_seq = r.info->seq; 1600 syslog_partial = 0; 1601 } 1602 1603 /* 1604 * To keep reading/counting partial line consistent, 1605 * use printk_time value as of the beginning of a line. 1606 */ 1607 if (!syslog_partial) 1608 syslog_time = printk_time; 1609 1610 skip = syslog_partial; 1611 n = record_print_text(&r, true, syslog_time); 1612 if (n - syslog_partial <= size) { 1613 /* message fits into buffer, move forward */ 1614 syslog_seq = r.info->seq + 1; 1615 n -= syslog_partial; 1616 syslog_partial = 0; 1617 } else if (!len){ 1618 /* partial read(), remember position */ 1619 n = size; 1620 syslog_partial += n; 1621 } else 1622 n = 0; 1623 1624 if (!n) 1625 break; 1626 1627 mutex_unlock(&syslog_lock); 1628 err = copy_to_user(buf, text + skip, n); 1629 mutex_lock(&syslog_lock); 1630 1631 if (err) { 1632 if (!len) 1633 len = -EFAULT; 1634 break; 1635 } 1636 1637 len += n; 1638 size -= n; 1639 buf += n; 1640 } while (size); 1641 out: 1642 mutex_unlock(&syslog_lock); 1643 kfree(text); 1644 return len; 1645 } 1646 1647 static int syslog_print_all(char __user *buf, int size, bool clear) 1648 { 1649 struct printk_info info; 1650 struct printk_record r; 1651 char *text; 1652 int len = 0; 1653 u64 seq; 1654 bool time; 1655 1656 text = kmalloc(PRINTK_MESSAGE_MAX, GFP_KERNEL); 1657 if (!text) 1658 return -ENOMEM; 1659 1660 time = printk_time; 1661 /* 1662 * Find first record that fits, including all following records, 1663 * into the user-provided buffer for this dump. 1664 */ 1665 seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1, 1666 size, true, time); 1667 1668 prb_rec_init_rd(&r, &info, text, PRINTK_MESSAGE_MAX); 1669 1670 len = 0; 1671 prb_for_each_record(seq, prb, seq, &r) { 1672 int textlen; 1673 1674 textlen = record_print_text(&r, true, time); 1675 1676 if (len + textlen > size) { 1677 seq--; 1678 break; 1679 } 1680 1681 if (copy_to_user(buf + len, text, textlen)) 1682 len = -EFAULT; 1683 else 1684 len += textlen; 1685 1686 if (len < 0) 1687 break; 1688 } 1689 1690 if (clear) { 1691 mutex_lock(&syslog_lock); 1692 latched_seq_write(&clear_seq, seq); 1693 mutex_unlock(&syslog_lock); 1694 } 1695 1696 kfree(text); 1697 return len; 1698 } 1699 1700 static void syslog_clear(void) 1701 { 1702 mutex_lock(&syslog_lock); 1703 latched_seq_write(&clear_seq, prb_next_seq(prb)); 1704 mutex_unlock(&syslog_lock); 1705 } 1706 1707 int do_syslog(int type, char __user *buf, int len, int source) 1708 { 1709 struct printk_info info; 1710 bool clear = false; 1711 static int saved_console_loglevel = LOGLEVEL_DEFAULT; 1712 int error; 1713 1714 error = check_syslog_permissions(type, source); 1715 if (error) 1716 return error; 1717 1718 switch (type) { 1719 case SYSLOG_ACTION_CLOSE: /* Close log */ 1720 break; 1721 case SYSLOG_ACTION_OPEN: /* Open log */ 1722 break; 1723 case SYSLOG_ACTION_READ: /* Read from log */ 1724 if (!buf || len < 0) 1725 return -EINVAL; 1726 if (!len) 1727 return 0; 1728 if (!access_ok(buf, len)) 1729 return -EFAULT; 1730 error = syslog_print(buf, len); 1731 break; 1732 /* Read/clear last kernel messages */ 1733 case SYSLOG_ACTION_READ_CLEAR: 1734 clear = true; 1735 fallthrough; 1736 /* Read last kernel messages */ 1737 case SYSLOG_ACTION_READ_ALL: 1738 if (!buf || len < 0) 1739 return -EINVAL; 1740 if (!len) 1741 return 0; 1742 if (!access_ok(buf, len)) 1743 return -EFAULT; 1744 error = syslog_print_all(buf, len, clear); 1745 break; 1746 /* Clear ring buffer */ 1747 case SYSLOG_ACTION_CLEAR: 1748 syslog_clear(); 1749 break; 1750 /* Disable logging to console */ 1751 case SYSLOG_ACTION_CONSOLE_OFF: 1752 if (saved_console_loglevel == LOGLEVEL_DEFAULT) 1753 saved_console_loglevel = console_loglevel; 1754 console_loglevel = minimum_console_loglevel; 1755 break; 1756 /* Enable logging to console */ 1757 case SYSLOG_ACTION_CONSOLE_ON: 1758 if (saved_console_loglevel != LOGLEVEL_DEFAULT) { 1759 console_loglevel = saved_console_loglevel; 1760 saved_console_loglevel = LOGLEVEL_DEFAULT; 1761 } 1762 break; 1763 /* Set level of messages printed to console */ 1764 case SYSLOG_ACTION_CONSOLE_LEVEL: 1765 if (len < 1 || len > 8) 1766 return -EINVAL; 1767 if (len < minimum_console_loglevel) 1768 len = minimum_console_loglevel; 1769 console_loglevel = len; 1770 /* Implicitly re-enable logging to console */ 1771 saved_console_loglevel = LOGLEVEL_DEFAULT; 1772 break; 1773 /* Number of chars in the log buffer */ 1774 case SYSLOG_ACTION_SIZE_UNREAD: 1775 mutex_lock(&syslog_lock); 1776 if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { 1777 /* No unread messages. */ 1778 mutex_unlock(&syslog_lock); 1779 return 0; 1780 } 1781 if (info.seq != syslog_seq) { 1782 /* messages are gone, move to first one */ 1783 syslog_seq = info.seq; 1784 syslog_partial = 0; 1785 } 1786 if (source == SYSLOG_FROM_PROC) { 1787 /* 1788 * Short-cut for poll(/"proc/kmsg") which simply checks 1789 * for pending data, not the size; return the count of 1790 * records, not the length. 1791 */ 1792 error = prb_next_seq(prb) - syslog_seq; 1793 } else { 1794 bool time = syslog_partial ? syslog_time : printk_time; 1795 unsigned int line_count; 1796 u64 seq; 1797 1798 prb_for_each_info(syslog_seq, prb, seq, &info, 1799 &line_count) { 1800 error += get_record_print_text_size(&info, line_count, 1801 true, time); 1802 time = printk_time; 1803 } 1804 error -= syslog_partial; 1805 } 1806 mutex_unlock(&syslog_lock); 1807 break; 1808 /* Size of the log buffer */ 1809 case SYSLOG_ACTION_SIZE_BUFFER: 1810 error = log_buf_len; 1811 break; 1812 default: 1813 error = -EINVAL; 1814 break; 1815 } 1816 1817 return error; 1818 } 1819 1820 SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) 1821 { 1822 return do_syslog(type, buf, len, SYSLOG_FROM_READER); 1823 } 1824 1825 /* 1826 * Special console_lock variants that help to reduce the risk of soft-lockups. 1827 * They allow to pass console_lock to another printk() call using a busy wait. 1828 */ 1829 1830 #ifdef CONFIG_LOCKDEP 1831 static struct lockdep_map console_owner_dep_map = { 1832 .name = "console_owner" 1833 }; 1834 #endif 1835 1836 static DEFINE_RAW_SPINLOCK(console_owner_lock); 1837 static struct task_struct *console_owner; 1838 static bool console_waiter; 1839 1840 /** 1841 * console_lock_spinning_enable - mark beginning of code where another 1842 * thread might safely busy wait 1843 * 1844 * This basically converts console_lock into a spinlock. This marks 1845 * the section where the console_lock owner can not sleep, because 1846 * there may be a waiter spinning (like a spinlock). Also it must be 1847 * ready to hand over the lock at the end of the section. 1848 */ 1849 static void console_lock_spinning_enable(void) 1850 { 1851 raw_spin_lock(&console_owner_lock); 1852 console_owner = current; 1853 raw_spin_unlock(&console_owner_lock); 1854 1855 /* The waiter may spin on us after setting console_owner */ 1856 spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); 1857 } 1858 1859 /** 1860 * console_lock_spinning_disable_and_check - mark end of code where another 1861 * thread was able to busy wait and check if there is a waiter 1862 * @cookie: cookie returned from console_srcu_read_lock() 1863 * 1864 * This is called at the end of the section where spinning is allowed. 1865 * It has two functions. First, it is a signal that it is no longer 1866 * safe to start busy waiting for the lock. Second, it checks if 1867 * there is a busy waiter and passes the lock rights to her. 1868 * 1869 * Important: Callers lose both the console_lock and the SRCU read lock if 1870 * there was a busy waiter. They must not touch items synchronized by 1871 * console_lock or SRCU read lock in this case. 1872 * 1873 * Return: 1 if the lock rights were passed, 0 otherwise. 1874 */ 1875 static int console_lock_spinning_disable_and_check(int cookie) 1876 { 1877 int waiter; 1878 1879 raw_spin_lock(&console_owner_lock); 1880 waiter = READ_ONCE(console_waiter); 1881 console_owner = NULL; 1882 raw_spin_unlock(&console_owner_lock); 1883 1884 if (!waiter) { 1885 spin_release(&console_owner_dep_map, _THIS_IP_); 1886 return 0; 1887 } 1888 1889 /* The waiter is now free to continue */ 1890 WRITE_ONCE(console_waiter, false); 1891 1892 spin_release(&console_owner_dep_map, _THIS_IP_); 1893 1894 /* 1895 * Preserve lockdep lock ordering. Release the SRCU read lock before 1896 * releasing the console_lock. 1897 */ 1898 console_srcu_read_unlock(cookie); 1899 1900 /* 1901 * Hand off console_lock to waiter. The waiter will perform 1902 * the up(). After this, the waiter is the console_lock owner. 1903 */ 1904 mutex_release(&console_lock_dep_map, _THIS_IP_); 1905 return 1; 1906 } 1907 1908 /** 1909 * console_trylock_spinning - try to get console_lock by busy waiting 1910 * 1911 * This allows to busy wait for the console_lock when the current 1912 * owner is running in specially marked sections. It means that 1913 * the current owner is running and cannot reschedule until it 1914 * is ready to lose the lock. 1915 * 1916 * Return: 1 if we got the lock, 0 othrewise 1917 */ 1918 static int console_trylock_spinning(void) 1919 { 1920 struct task_struct *owner = NULL; 1921 bool waiter; 1922 bool spin = false; 1923 unsigned long flags; 1924 1925 if (console_trylock()) 1926 return 1; 1927 1928 /* 1929 * It's unsafe to spin once a panic has begun. If we are the 1930 * panic CPU, we may have already halted the owner of the 1931 * console_sem. If we are not the panic CPU, then we should 1932 * avoid taking console_sem, so the panic CPU has a better 1933 * chance of cleanly acquiring it later. 1934 */ 1935 if (panic_in_progress()) 1936 return 0; 1937 1938 printk_safe_enter_irqsave(flags); 1939 1940 raw_spin_lock(&console_owner_lock); 1941 owner = READ_ONCE(console_owner); 1942 waiter = READ_ONCE(console_waiter); 1943 if (!waiter && owner && owner != current) { 1944 WRITE_ONCE(console_waiter, true); 1945 spin = true; 1946 } 1947 raw_spin_unlock(&console_owner_lock); 1948 1949 /* 1950 * If there is an active printk() writing to the 1951 * consoles, instead of having it write our data too, 1952 * see if we can offload that load from the active 1953 * printer, and do some printing ourselves. 1954 * Go into a spin only if there isn't already a waiter 1955 * spinning, and there is an active printer, and 1956 * that active printer isn't us (recursive printk?). 1957 */ 1958 if (!spin) { 1959 printk_safe_exit_irqrestore(flags); 1960 return 0; 1961 } 1962 1963 /* We spin waiting for the owner to release us */ 1964 spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); 1965 /* Owner will clear console_waiter on hand off */ 1966 while (READ_ONCE(console_waiter)) 1967 cpu_relax(); 1968 spin_release(&console_owner_dep_map, _THIS_IP_); 1969 1970 printk_safe_exit_irqrestore(flags); 1971 /* 1972 * The owner passed the console lock to us. 1973 * Since we did not spin on console lock, annotate 1974 * this as a trylock. Otherwise lockdep will 1975 * complain. 1976 */ 1977 mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); 1978 1979 return 1; 1980 } 1981 1982 /* 1983 * Recursion is tracked separately on each CPU. If NMIs are supported, an 1984 * additional NMI context per CPU is also separately tracked. Until per-CPU 1985 * is available, a separate "early tracking" is performed. 1986 */ 1987 static DEFINE_PER_CPU(u8, printk_count); 1988 static u8 printk_count_early; 1989 #ifdef CONFIG_HAVE_NMI 1990 static DEFINE_PER_CPU(u8, printk_count_nmi); 1991 static u8 printk_count_nmi_early; 1992 #endif 1993 1994 /* 1995 * Recursion is limited to keep the output sane. printk() should not require 1996 * more than 1 level of recursion (allowing, for example, printk() to trigger 1997 * a WARN), but a higher value is used in case some printk-internal errors 1998 * exist, such as the ringbuffer validation checks failing. 1999 */ 2000 #define PRINTK_MAX_RECURSION 3 2001 2002 /* 2003 * Return a pointer to the dedicated counter for the CPU+context of the 2004 * caller. 2005 */ 2006 static u8 *__printk_recursion_counter(void) 2007 { 2008 #ifdef CONFIG_HAVE_NMI 2009 if (in_nmi()) { 2010 if (printk_percpu_data_ready()) 2011 return this_cpu_ptr(&printk_count_nmi); 2012 return &printk_count_nmi_early; 2013 } 2014 #endif 2015 if (printk_percpu_data_ready()) 2016 return this_cpu_ptr(&printk_count); 2017 return &printk_count_early; 2018 } 2019 2020 /* 2021 * Enter recursion tracking. Interrupts are disabled to simplify tracking. 2022 * The caller must check the boolean return value to see if the recursion is 2023 * allowed. On failure, interrupts are not disabled. 2024 * 2025 * @recursion_ptr must be a variable of type (u8 *) and is the same variable 2026 * that is passed to printk_exit_irqrestore(). 2027 */ 2028 #define printk_enter_irqsave(recursion_ptr, flags) \ 2029 ({ \ 2030 bool success = true; \ 2031 \ 2032 typecheck(u8 *, recursion_ptr); \ 2033 local_irq_save(flags); \ 2034 (recursion_ptr) = __printk_recursion_counter(); \ 2035 if (*(recursion_ptr) > PRINTK_MAX_RECURSION) { \ 2036 local_irq_restore(flags); \ 2037 success = false; \ 2038 } else { \ 2039 (*(recursion_ptr))++; \ 2040 } \ 2041 success; \ 2042 }) 2043 2044 /* Exit recursion tracking, restoring interrupts. */ 2045 #define printk_exit_irqrestore(recursion_ptr, flags) \ 2046 do { \ 2047 typecheck(u8 *, recursion_ptr); \ 2048 (*(recursion_ptr))--; \ 2049 local_irq_restore(flags); \ 2050 } while (0) 2051 2052 int printk_delay_msec __read_mostly; 2053 2054 static inline void printk_delay(int level) 2055 { 2056 boot_delay_msec(level); 2057 2058 if (unlikely(printk_delay_msec)) { 2059 int m = printk_delay_msec; 2060 2061 while (m--) { 2062 mdelay(1); 2063 touch_nmi_watchdog(); 2064 } 2065 } 2066 } 2067 2068 static inline u32 printk_caller_id(void) 2069 { 2070 return in_task() ? task_pid_nr(current) : 2071 0x80000000 + smp_processor_id(); 2072 } 2073 2074 /** 2075 * printk_parse_prefix - Parse level and control flags. 2076 * 2077 * @text: The terminated text message. 2078 * @level: A pointer to the current level value, will be updated. 2079 * @flags: A pointer to the current printk_info flags, will be updated. 2080 * 2081 * @level may be NULL if the caller is not interested in the parsed value. 2082 * Otherwise the variable pointed to by @level must be set to 2083 * LOGLEVEL_DEFAULT in order to be updated with the parsed value. 2084 * 2085 * @flags may be NULL if the caller is not interested in the parsed value. 2086 * Otherwise the variable pointed to by @flags will be OR'd with the parsed 2087 * value. 2088 * 2089 * Return: The length of the parsed level and control flags. 2090 */ 2091 u16 printk_parse_prefix(const char *text, int *level, 2092 enum printk_info_flags *flags) 2093 { 2094 u16 prefix_len = 0; 2095 int kern_level; 2096 2097 while (*text) { 2098 kern_level = printk_get_level(text); 2099 if (!kern_level) 2100 break; 2101 2102 switch (kern_level) { 2103 case '0' ... '7': 2104 if (level && *level == LOGLEVEL_DEFAULT) 2105 *level = kern_level - '0'; 2106 break; 2107 case 'c': /* KERN_CONT */ 2108 if (flags) 2109 *flags |= LOG_CONT; 2110 } 2111 2112 prefix_len += 2; 2113 text += 2; 2114 } 2115 2116 return prefix_len; 2117 } 2118 2119 __printf(5, 0) 2120 static u16 printk_sprint(char *text, u16 size, int facility, 2121 enum printk_info_flags *flags, const char *fmt, 2122 va_list args) 2123 { 2124 u16 text_len; 2125 2126 text_len = vscnprintf(text, size, fmt, args); 2127 2128 /* Mark and strip a trailing newline. */ 2129 if (text_len && text[text_len - 1] == '\n') { 2130 text_len--; 2131 *flags |= LOG_NEWLINE; 2132 } 2133 2134 /* Strip log level and control flags. */ 2135 if (facility == 0) { 2136 u16 prefix_len; 2137 2138 prefix_len = printk_parse_prefix(text, NULL, NULL); 2139 if (prefix_len) { 2140 text_len -= prefix_len; 2141 memmove(text, text + prefix_len, text_len); 2142 } 2143 } 2144 2145 trace_console(text, text_len); 2146 2147 return text_len; 2148 } 2149 2150 __printf(4, 0) 2151 int vprintk_store(int facility, int level, 2152 const struct dev_printk_info *dev_info, 2153 const char *fmt, va_list args) 2154 { 2155 struct prb_reserved_entry e; 2156 enum printk_info_flags flags = 0; 2157 struct printk_record r; 2158 unsigned long irqflags; 2159 u16 trunc_msg_len = 0; 2160 char prefix_buf[8]; 2161 u8 *recursion_ptr; 2162 u16 reserve_size; 2163 va_list args2; 2164 u32 caller_id; 2165 u16 text_len; 2166 int ret = 0; 2167 u64 ts_nsec; 2168 2169 if (!printk_enter_irqsave(recursion_ptr, irqflags)) 2170 return 0; 2171 2172 /* 2173 * Since the duration of printk() can vary depending on the message 2174 * and state of the ringbuffer, grab the timestamp now so that it is 2175 * close to the call of printk(). This provides a more deterministic 2176 * timestamp with respect to the caller. 2177 */ 2178 ts_nsec = local_clock(); 2179 2180 caller_id = printk_caller_id(); 2181 2182 /* 2183 * The sprintf needs to come first since the syslog prefix might be 2184 * passed in as a parameter. An extra byte must be reserved so that 2185 * later the vscnprintf() into the reserved buffer has room for the 2186 * terminating '\0', which is not counted by vsnprintf(). 2187 */ 2188 va_copy(args2, args); 2189 reserve_size = vsnprintf(&prefix_buf[0], sizeof(prefix_buf), fmt, args2) + 1; 2190 va_end(args2); 2191 2192 if (reserve_size > PRINTKRB_RECORD_MAX) 2193 reserve_size = PRINTKRB_RECORD_MAX; 2194 2195 /* Extract log level or control flags. */ 2196 if (facility == 0) 2197 printk_parse_prefix(&prefix_buf[0], &level, &flags); 2198 2199 if (level == LOGLEVEL_DEFAULT) 2200 level = default_message_loglevel; 2201 2202 if (dev_info) 2203 flags |= LOG_NEWLINE; 2204 2205 if (flags & LOG_CONT) { 2206 prb_rec_init_wr(&r, reserve_size); 2207 if (prb_reserve_in_last(&e, prb, &r, caller_id, PRINTKRB_RECORD_MAX)) { 2208 text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size, 2209 facility, &flags, fmt, args); 2210 r.info->text_len += text_len; 2211 2212 if (flags & LOG_NEWLINE) { 2213 r.info->flags |= LOG_NEWLINE; 2214 prb_final_commit(&e); 2215 } else { 2216 prb_commit(&e); 2217 } 2218 2219 ret = text_len; 2220 goto out; 2221 } 2222 } 2223 2224 /* 2225 * Explicitly initialize the record before every prb_reserve() call. 2226 * prb_reserve_in_last() and prb_reserve() purposely invalidate the 2227 * structure when they fail. 2228 */ 2229 prb_rec_init_wr(&r, reserve_size); 2230 if (!prb_reserve(&e, prb, &r)) { 2231 /* truncate the message if it is too long for empty buffer */ 2232 truncate_msg(&reserve_size, &trunc_msg_len); 2233 2234 prb_rec_init_wr(&r, reserve_size + trunc_msg_len); 2235 if (!prb_reserve(&e, prb, &r)) 2236 goto out; 2237 } 2238 2239 /* fill message */ 2240 text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args); 2241 if (trunc_msg_len) 2242 memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len); 2243 r.info->text_len = text_len + trunc_msg_len; 2244 r.info->facility = facility; 2245 r.info->level = level & 7; 2246 r.info->flags = flags & 0x1f; 2247 r.info->ts_nsec = ts_nsec; 2248 r.info->caller_id = caller_id; 2249 if (dev_info) 2250 memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); 2251 2252 /* A message without a trailing newline can be continued. */ 2253 if (!(flags & LOG_NEWLINE)) 2254 prb_commit(&e); 2255 else 2256 prb_final_commit(&e); 2257 2258 ret = text_len + trunc_msg_len; 2259 out: 2260 printk_exit_irqrestore(recursion_ptr, irqflags); 2261 return ret; 2262 } 2263 2264 asmlinkage int vprintk_emit(int facility, int level, 2265 const struct dev_printk_info *dev_info, 2266 const char *fmt, va_list args) 2267 { 2268 int printed_len; 2269 bool in_sched = false; 2270 2271 /* Suppress unimportant messages after panic happens */ 2272 if (unlikely(suppress_printk)) 2273 return 0; 2274 2275 if (unlikely(suppress_panic_printk) && 2276 atomic_read(&panic_cpu) != raw_smp_processor_id()) 2277 return 0; 2278 2279 if (level == LOGLEVEL_SCHED) { 2280 level = LOGLEVEL_DEFAULT; 2281 in_sched = true; 2282 } 2283 2284 printk_delay(level); 2285 2286 printed_len = vprintk_store(facility, level, dev_info, fmt, args); 2287 2288 /* If called from the scheduler, we can not call up(). */ 2289 if (!in_sched) { 2290 /* 2291 * The caller may be holding system-critical or 2292 * timing-sensitive locks. Disable preemption during 2293 * printing of all remaining records to all consoles so that 2294 * this context can return as soon as possible. Hopefully 2295 * another printk() caller will take over the printing. 2296 */ 2297 preempt_disable(); 2298 /* 2299 * Try to acquire and then immediately release the console 2300 * semaphore. The release will print out buffers. With the 2301 * spinning variant, this context tries to take over the 2302 * printing from another printing context. 2303 */ 2304 if (console_trylock_spinning()) 2305 console_unlock(); 2306 preempt_enable(); 2307 } 2308 2309 wake_up_klogd(); 2310 return printed_len; 2311 } 2312 EXPORT_SYMBOL(vprintk_emit); 2313 2314 int vprintk_default(const char *fmt, va_list args) 2315 { 2316 return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); 2317 } 2318 EXPORT_SYMBOL_GPL(vprintk_default); 2319 2320 asmlinkage __visible int _printk(const char *fmt, ...) 2321 { 2322 va_list args; 2323 int r; 2324 2325 va_start(args, fmt); 2326 r = vprintk(fmt, args); 2327 va_end(args); 2328 2329 return r; 2330 } 2331 EXPORT_SYMBOL(_printk); 2332 2333 static bool pr_flush(int timeout_ms, bool reset_on_progress); 2334 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); 2335 2336 #else /* CONFIG_PRINTK */ 2337 2338 #define printk_time false 2339 2340 #define prb_read_valid(rb, seq, r) false 2341 #define prb_first_valid_seq(rb) 0 2342 #define prb_next_seq(rb) 0 2343 2344 static u64 syslog_seq; 2345 2346 static size_t record_print_text(const struct printk_record *r, 2347 bool syslog, bool time) 2348 { 2349 return 0; 2350 } 2351 static ssize_t info_print_ext_header(char *buf, size_t size, 2352 struct printk_info *info) 2353 { 2354 return 0; 2355 } 2356 static ssize_t msg_print_ext_body(char *buf, size_t size, 2357 char *text, size_t text_len, 2358 struct dev_printk_info *dev_info) { return 0; } 2359 static void console_lock_spinning_enable(void) { } 2360 static int console_lock_spinning_disable_and_check(int cookie) { return 0; } 2361 static bool suppress_message_printing(int level) { return false; } 2362 static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; } 2363 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } 2364 2365 #endif /* CONFIG_PRINTK */ 2366 2367 #ifdef CONFIG_EARLY_PRINTK 2368 struct console *early_console; 2369 2370 asmlinkage __visible void early_printk(const char *fmt, ...) 2371 { 2372 va_list ap; 2373 char buf[512]; 2374 int n; 2375 2376 if (!early_console) 2377 return; 2378 2379 va_start(ap, fmt); 2380 n = vscnprintf(buf, sizeof(buf), fmt, ap); 2381 va_end(ap); 2382 2383 early_console->write(early_console, buf, n); 2384 } 2385 #endif 2386 2387 static void set_user_specified(struct console_cmdline *c, bool user_specified) 2388 { 2389 if (!user_specified) 2390 return; 2391 2392 /* 2393 * @c console was defined by the user on the command line. 2394 * Do not clear when added twice also by SPCR or the device tree. 2395 */ 2396 c->user_specified = true; 2397 /* At least one console defined by the user on the command line. */ 2398 console_set_on_cmdline = 1; 2399 } 2400 2401 static int __add_preferred_console(char *name, int idx, char *options, 2402 char *brl_options, bool user_specified) 2403 { 2404 struct console_cmdline *c; 2405 int i; 2406 2407 /* 2408 * See if this tty is not yet registered, and 2409 * if we have a slot free. 2410 */ 2411 for (i = 0, c = console_cmdline; 2412 i < MAX_CMDLINECONSOLES && c->name[0]; 2413 i++, c++) { 2414 if (strcmp(c->name, name) == 0 && c->index == idx) { 2415 if (!brl_options) 2416 preferred_console = i; 2417 set_user_specified(c, user_specified); 2418 return 0; 2419 } 2420 } 2421 if (i == MAX_CMDLINECONSOLES) 2422 return -E2BIG; 2423 if (!brl_options) 2424 preferred_console = i; 2425 strscpy(c->name, name, sizeof(c->name)); 2426 c->options = options; 2427 set_user_specified(c, user_specified); 2428 braille_set_options(c, brl_options); 2429 2430 c->index = idx; 2431 return 0; 2432 } 2433 2434 static int __init console_msg_format_setup(char *str) 2435 { 2436 if (!strcmp(str, "syslog")) 2437 console_msg_format = MSG_FORMAT_SYSLOG; 2438 if (!strcmp(str, "default")) 2439 console_msg_format = MSG_FORMAT_DEFAULT; 2440 return 1; 2441 } 2442 __setup("console_msg_format=", console_msg_format_setup); 2443 2444 /* 2445 * Set up a console. Called via do_early_param() in init/main.c 2446 * for each "console=" parameter in the boot command line. 2447 */ 2448 static int __init console_setup(char *str) 2449 { 2450 char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */ 2451 char *s, *options, *brl_options = NULL; 2452 int idx; 2453 2454 /* 2455 * console="" or console=null have been suggested as a way to 2456 * disable console output. Use ttynull that has been created 2457 * for exactly this purpose. 2458 */ 2459 if (str[0] == 0 || strcmp(str, "null") == 0) { 2460 __add_preferred_console("ttynull", 0, NULL, NULL, true); 2461 return 1; 2462 } 2463 2464 if (_braille_console_setup(&str, &brl_options)) 2465 return 1; 2466 2467 /* 2468 * Decode str into name, index, options. 2469 */ 2470 if (str[0] >= '0' && str[0] <= '9') { 2471 strcpy(buf, "ttyS"); 2472 strncpy(buf + 4, str, sizeof(buf) - 5); 2473 } else { 2474 strncpy(buf, str, sizeof(buf) - 1); 2475 } 2476 buf[sizeof(buf) - 1] = 0; 2477 options = strchr(str, ','); 2478 if (options) 2479 *(options++) = 0; 2480 #ifdef __sparc__ 2481 if (!strcmp(str, "ttya")) 2482 strcpy(buf, "ttyS0"); 2483 if (!strcmp(str, "ttyb")) 2484 strcpy(buf, "ttyS1"); 2485 #endif 2486 for (s = buf; *s; s++) 2487 if (isdigit(*s) || *s == ',') 2488 break; 2489 idx = simple_strtoul(s, NULL, 10); 2490 *s = 0; 2491 2492 __add_preferred_console(buf, idx, options, brl_options, true); 2493 return 1; 2494 } 2495 __setup("console=", console_setup); 2496 2497 /** 2498 * add_preferred_console - add a device to the list of preferred consoles. 2499 * @name: device name 2500 * @idx: device index 2501 * @options: options for this console 2502 * 2503 * The last preferred console added will be used for kernel messages 2504 * and stdin/out/err for init. Normally this is used by console_setup 2505 * above to handle user-supplied console arguments; however it can also 2506 * be used by arch-specific code either to override the user or more 2507 * commonly to provide a default console (ie from PROM variables) when 2508 * the user has not supplied one. 2509 */ 2510 int add_preferred_console(char *name, int idx, char *options) 2511 { 2512 return __add_preferred_console(name, idx, options, NULL, false); 2513 } 2514 2515 bool console_suspend_enabled = true; 2516 EXPORT_SYMBOL(console_suspend_enabled); 2517 2518 static int __init console_suspend_disable(char *str) 2519 { 2520 console_suspend_enabled = false; 2521 return 1; 2522 } 2523 __setup("no_console_suspend", console_suspend_disable); 2524 module_param_named(console_suspend, console_suspend_enabled, 2525 bool, S_IRUGO | S_IWUSR); 2526 MODULE_PARM_DESC(console_suspend, "suspend console during suspend" 2527 " and hibernate operations"); 2528 2529 static bool printk_console_no_auto_verbose; 2530 2531 void console_verbose(void) 2532 { 2533 if (console_loglevel && !printk_console_no_auto_verbose) 2534 console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH; 2535 } 2536 EXPORT_SYMBOL_GPL(console_verbose); 2537 2538 module_param_named(console_no_auto_verbose, printk_console_no_auto_verbose, bool, 0644); 2539 MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to highest on oops/panic/etc"); 2540 2541 /** 2542 * suspend_console - suspend the console subsystem 2543 * 2544 * This disables printk() while we go into suspend states 2545 */ 2546 void suspend_console(void) 2547 { 2548 if (!console_suspend_enabled) 2549 return; 2550 pr_info("Suspending console(s) (use no_console_suspend to debug)\n"); 2551 pr_flush(1000, true); 2552 console_lock(); 2553 console_suspended = 1; 2554 up_console_sem(); 2555 } 2556 2557 void resume_console(void) 2558 { 2559 if (!console_suspend_enabled) 2560 return; 2561 down_console_sem(); 2562 console_suspended = 0; 2563 console_unlock(); 2564 pr_flush(1000, true); 2565 } 2566 2567 /** 2568 * console_cpu_notify - print deferred console messages after CPU hotplug 2569 * @cpu: unused 2570 * 2571 * If printk() is called from a CPU that is not online yet, the messages 2572 * will be printed on the console only if there are CON_ANYTIME consoles. 2573 * This function is called when a new CPU comes online (or fails to come 2574 * up) or goes offline. 2575 */ 2576 static int console_cpu_notify(unsigned int cpu) 2577 { 2578 if (!cpuhp_tasks_frozen) { 2579 /* If trylock fails, someone else is doing the printing */ 2580 if (console_trylock()) 2581 console_unlock(); 2582 } 2583 return 0; 2584 } 2585 2586 /** 2587 * console_lock - block the console subsystem from printing 2588 * 2589 * Acquires a lock which guarantees that no consoles will 2590 * be in or enter their write() callback. 2591 * 2592 * Can sleep, returns nothing. 2593 */ 2594 void console_lock(void) 2595 { 2596 might_sleep(); 2597 2598 down_console_sem(); 2599 if (console_suspended) 2600 return; 2601 console_locked = 1; 2602 console_may_schedule = 1; 2603 } 2604 EXPORT_SYMBOL(console_lock); 2605 2606 /** 2607 * console_trylock - try to block the console subsystem from printing 2608 * 2609 * Try to acquire a lock which guarantees that no consoles will 2610 * be in or enter their write() callback. 2611 * 2612 * returns 1 on success, and 0 on failure to acquire the lock. 2613 */ 2614 int console_trylock(void) 2615 { 2616 if (down_trylock_console_sem()) 2617 return 0; 2618 if (console_suspended) { 2619 up_console_sem(); 2620 return 0; 2621 } 2622 console_locked = 1; 2623 console_may_schedule = 0; 2624 return 1; 2625 } 2626 EXPORT_SYMBOL(console_trylock); 2627 2628 int is_console_locked(void) 2629 { 2630 return console_locked; 2631 } 2632 EXPORT_SYMBOL(is_console_locked); 2633 2634 /* 2635 * Return true when this CPU should unlock console_sem without pushing all 2636 * messages to the console. This reduces the chance that the console is 2637 * locked when the panic CPU tries to use it. 2638 */ 2639 static bool abandon_console_lock_in_panic(void) 2640 { 2641 if (!panic_in_progress()) 2642 return false; 2643 2644 /* 2645 * We can use raw_smp_processor_id() here because it is impossible for 2646 * the task to be migrated to the panic_cpu, or away from it. If 2647 * panic_cpu has already been set, and we're not currently executing on 2648 * that CPU, then we never will be. 2649 */ 2650 return atomic_read(&panic_cpu) != raw_smp_processor_id(); 2651 } 2652 2653 /* 2654 * Check if the given console is currently capable and allowed to print 2655 * records. 2656 * 2657 * Requires the console_srcu_read_lock. 2658 */ 2659 static inline bool console_is_usable(struct console *con) 2660 { 2661 short flags = console_srcu_read_flags(con); 2662 2663 if (!(flags & CON_ENABLED)) 2664 return false; 2665 2666 if (!con->write) 2667 return false; 2668 2669 /* 2670 * Console drivers may assume that per-cpu resources have been 2671 * allocated. So unless they're explicitly marked as being able to 2672 * cope (CON_ANYTIME) don't call them until this CPU is officially up. 2673 */ 2674 if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME)) 2675 return false; 2676 2677 return true; 2678 } 2679 2680 static void __console_unlock(void) 2681 { 2682 console_locked = 0; 2683 up_console_sem(); 2684 } 2685 2686 /* 2687 * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message". This 2688 * is achieved by shifting the existing message over and inserting the dropped 2689 * message. 2690 * 2691 * @pmsg is the printk message to prepend. 2692 * 2693 * @dropped is the dropped count to report in the dropped message. 2694 * 2695 * If the message text in @pmsg->pbufs->outbuf does not have enough space for 2696 * the dropped message, the message text will be sufficiently truncated. 2697 * 2698 * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated. 2699 */ 2700 #ifdef CONFIG_PRINTK 2701 static void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) 2702 { 2703 struct printk_buffers *pbufs = pmsg->pbufs; 2704 const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); 2705 const size_t outbuf_sz = sizeof(pbufs->outbuf); 2706 char *scratchbuf = &pbufs->scratchbuf[0]; 2707 char *outbuf = &pbufs->outbuf[0]; 2708 size_t len; 2709 2710 len = scnprintf(scratchbuf, scratchbuf_sz, 2711 "** %lu printk messages dropped **\n", dropped); 2712 2713 /* 2714 * Make sure outbuf is sufficiently large before prepending. 2715 * Keep at least the prefix when the message must be truncated. 2716 * It is a rather theoretical problem when someone tries to 2717 * use a minimalist buffer. 2718 */ 2719 if (WARN_ON_ONCE(len + PRINTK_PREFIX_MAX >= outbuf_sz)) 2720 return; 2721 2722 if (pmsg->outbuf_len + len >= outbuf_sz) { 2723 /* Truncate the message, but keep it terminated. */ 2724 pmsg->outbuf_len = outbuf_sz - (len + 1); 2725 outbuf[pmsg->outbuf_len] = 0; 2726 } 2727 2728 memmove(outbuf + len, outbuf, pmsg->outbuf_len + 1); 2729 memcpy(outbuf, scratchbuf, len); 2730 pmsg->outbuf_len += len; 2731 } 2732 #else 2733 #define console_prepend_dropped(pmsg, dropped) 2734 #endif /* CONFIG_PRINTK */ 2735 2736 /* 2737 * Read and format the specified record (or a later record if the specified 2738 * record is not available). 2739 * 2740 * @pmsg will contain the formatted result. @pmsg->pbufs must point to a 2741 * struct printk_buffers. 2742 * 2743 * @seq is the record to read and format. If it is not available, the next 2744 * valid record is read. 2745 * 2746 * @is_extended specifies if the message should be formatted for extended 2747 * console output. 2748 * 2749 * @may_supress specifies if records may be skipped based on loglevel. 2750 * 2751 * Returns false if no record is available. Otherwise true and all fields 2752 * of @pmsg are valid. (See the documentation of struct printk_message 2753 * for information about the @pmsg fields.) 2754 */ 2755 static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, 2756 bool is_extended, bool may_suppress) 2757 { 2758 static int panic_console_dropped; 2759 2760 struct printk_buffers *pbufs = pmsg->pbufs; 2761 const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); 2762 const size_t outbuf_sz = sizeof(pbufs->outbuf); 2763 char *scratchbuf = &pbufs->scratchbuf[0]; 2764 char *outbuf = &pbufs->outbuf[0]; 2765 struct printk_info info; 2766 struct printk_record r; 2767 size_t len = 0; 2768 2769 /* 2770 * Formatting extended messages requires a separate buffer, so use the 2771 * scratch buffer to read in the ringbuffer text. 2772 * 2773 * Formatting normal messages is done in-place, so read the ringbuffer 2774 * text directly into the output buffer. 2775 */ 2776 if (is_extended) 2777 prb_rec_init_rd(&r, &info, scratchbuf, scratchbuf_sz); 2778 else 2779 prb_rec_init_rd(&r, &info, outbuf, outbuf_sz); 2780 2781 if (!prb_read_valid(prb, seq, &r)) 2782 return false; 2783 2784 pmsg->seq = r.info->seq; 2785 pmsg->dropped = r.info->seq - seq; 2786 2787 /* 2788 * Check for dropped messages in panic here so that printk 2789 * suppression can occur as early as possible if necessary. 2790 */ 2791 if (pmsg->dropped && 2792 panic_in_progress() && 2793 panic_console_dropped++ > 10) { 2794 suppress_panic_printk = 1; 2795 pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n"); 2796 } 2797 2798 /* Skip record that has level above the console loglevel. */ 2799 if (may_suppress && suppress_message_printing(r.info->level)) 2800 goto out; 2801 2802 if (is_extended) { 2803 len = info_print_ext_header(outbuf, outbuf_sz, r.info); 2804 len += msg_print_ext_body(outbuf + len, outbuf_sz - len, 2805 &r.text_buf[0], r.info->text_len, &r.info->dev_info); 2806 } else { 2807 len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); 2808 } 2809 out: 2810 pmsg->outbuf_len = len; 2811 return true; 2812 } 2813 2814 /* 2815 * Print one record for the given console. The record printed is whatever 2816 * record is the next available record for the given console. 2817 * 2818 * @handover will be set to true if a printk waiter has taken over the 2819 * console_lock, in which case the caller is no longer holding both the 2820 * console_lock and the SRCU read lock. Otherwise it is set to false. 2821 * 2822 * @cookie is the cookie from the SRCU read lock. 2823 * 2824 * Returns false if the given console has no next record to print, otherwise 2825 * true. 2826 * 2827 * Requires the console_lock and the SRCU read lock. 2828 */ 2829 static bool console_emit_next_record(struct console *con, bool *handover, int cookie) 2830 { 2831 static struct printk_buffers pbufs; 2832 2833 bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED; 2834 char *outbuf = &pbufs.outbuf[0]; 2835 struct printk_message pmsg = { 2836 .pbufs = &pbufs, 2837 }; 2838 unsigned long flags; 2839 2840 *handover = false; 2841 2842 if (!printk_get_next_message(&pmsg, con->seq, is_extended, true)) 2843 return false; 2844 2845 con->dropped += pmsg.dropped; 2846 2847 /* Skip messages of formatted length 0. */ 2848 if (pmsg.outbuf_len == 0) { 2849 con->seq = pmsg.seq + 1; 2850 goto skip; 2851 } 2852 2853 if (con->dropped && !is_extended) { 2854 console_prepend_dropped(&pmsg, con->dropped); 2855 con->dropped = 0; 2856 } 2857 2858 /* 2859 * While actively printing out messages, if another printk() 2860 * were to occur on another CPU, it may wait for this one to 2861 * finish. This task can not be preempted if there is a 2862 * waiter waiting to take over. 2863 * 2864 * Interrupts are disabled because the hand over to a waiter 2865 * must not be interrupted until the hand over is completed 2866 * (@console_waiter is cleared). 2867 */ 2868 printk_safe_enter_irqsave(flags); 2869 console_lock_spinning_enable(); 2870 2871 /* Do not trace print latency. */ 2872 stop_critical_timings(); 2873 2874 /* Write everything out to the hardware. */ 2875 con->write(con, outbuf, pmsg.outbuf_len); 2876 2877 start_critical_timings(); 2878 2879 con->seq = pmsg.seq + 1; 2880 2881 *handover = console_lock_spinning_disable_and_check(cookie); 2882 printk_safe_exit_irqrestore(flags); 2883 skip: 2884 return true; 2885 } 2886 2887 /* 2888 * Print out all remaining records to all consoles. 2889 * 2890 * @do_cond_resched is set by the caller. It can be true only in schedulable 2891 * context. 2892 * 2893 * @next_seq is set to the sequence number after the last available record. 2894 * The value is valid only when this function returns true. It means that all 2895 * usable consoles are completely flushed. 2896 * 2897 * @handover will be set to true if a printk waiter has taken over the 2898 * console_lock, in which case the caller is no longer holding the 2899 * console_lock. Otherwise it is set to false. 2900 * 2901 * Returns true when there was at least one usable console and all messages 2902 * were flushed to all usable consoles. A returned false informs the caller 2903 * that everything was not flushed (either there were no usable consoles or 2904 * another context has taken over printing or it is a panic situation and this 2905 * is not the panic CPU). Regardless the reason, the caller should assume it 2906 * is not useful to immediately try again. 2907 * 2908 * Requires the console_lock. 2909 */ 2910 static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover) 2911 { 2912 bool any_usable = false; 2913 struct console *con; 2914 bool any_progress; 2915 int cookie; 2916 2917 *next_seq = 0; 2918 *handover = false; 2919 2920 do { 2921 any_progress = false; 2922 2923 cookie = console_srcu_read_lock(); 2924 for_each_console_srcu(con) { 2925 bool progress; 2926 2927 if (!console_is_usable(con)) 2928 continue; 2929 any_usable = true; 2930 2931 progress = console_emit_next_record(con, handover, cookie); 2932 2933 /* 2934 * If a handover has occurred, the SRCU read lock 2935 * is already released. 2936 */ 2937 if (*handover) 2938 return false; 2939 2940 /* Track the next of the highest seq flushed. */ 2941 if (con->seq > *next_seq) 2942 *next_seq = con->seq; 2943 2944 if (!progress) 2945 continue; 2946 any_progress = true; 2947 2948 /* Allow panic_cpu to take over the consoles safely. */ 2949 if (abandon_console_lock_in_panic()) 2950 goto abandon; 2951 2952 if (do_cond_resched) 2953 cond_resched(); 2954 } 2955 console_srcu_read_unlock(cookie); 2956 } while (any_progress); 2957 2958 return any_usable; 2959 2960 abandon: 2961 console_srcu_read_unlock(cookie); 2962 return false; 2963 } 2964 2965 /** 2966 * console_unlock - unblock the console subsystem from printing 2967 * 2968 * Releases the console_lock which the caller holds to block printing of 2969 * the console subsystem. 2970 * 2971 * While the console_lock was held, console output may have been buffered 2972 * by printk(). If this is the case, console_unlock(); emits 2973 * the output prior to releasing the lock. 2974 * 2975 * console_unlock(); may be called from any context. 2976 */ 2977 void console_unlock(void) 2978 { 2979 bool do_cond_resched; 2980 bool handover; 2981 bool flushed; 2982 u64 next_seq; 2983 2984 if (console_suspended) { 2985 up_console_sem(); 2986 return; 2987 } 2988 2989 /* 2990 * Console drivers are called with interrupts disabled, so 2991 * @console_may_schedule should be cleared before; however, we may 2992 * end up dumping a lot of lines, for example, if called from 2993 * console registration path, and should invoke cond_resched() 2994 * between lines if allowable. Not doing so can cause a very long 2995 * scheduling stall on a slow console leading to RCU stall and 2996 * softlockup warnings which exacerbate the issue with more 2997 * messages practically incapacitating the system. Therefore, create 2998 * a local to use for the printing loop. 2999 */ 3000 do_cond_resched = console_may_schedule; 3001 3002 do { 3003 console_may_schedule = 0; 3004 3005 flushed = console_flush_all(do_cond_resched, &next_seq, &handover); 3006 if (!handover) 3007 __console_unlock(); 3008 3009 /* 3010 * Abort if there was a failure to flush all messages to all 3011 * usable consoles. Either it is not possible to flush (in 3012 * which case it would be an infinite loop of retrying) or 3013 * another context has taken over printing. 3014 */ 3015 if (!flushed) 3016 break; 3017 3018 /* 3019 * Some context may have added new records after 3020 * console_flush_all() but before unlocking the console. 3021 * Re-check if there is a new record to flush. If the trylock 3022 * fails, another context is already handling the printing. 3023 */ 3024 } while (prb_read_valid(prb, next_seq, NULL) && console_trylock()); 3025 } 3026 EXPORT_SYMBOL(console_unlock); 3027 3028 /** 3029 * console_conditional_schedule - yield the CPU if required 3030 * 3031 * If the console code is currently allowed to sleep, and 3032 * if this CPU should yield the CPU to another task, do 3033 * so here. 3034 * 3035 * Must be called within console_lock();. 3036 */ 3037 void __sched console_conditional_schedule(void) 3038 { 3039 if (console_may_schedule) 3040 cond_resched(); 3041 } 3042 EXPORT_SYMBOL(console_conditional_schedule); 3043 3044 void console_unblank(void) 3045 { 3046 struct console *c; 3047 int cookie; 3048 3049 /* 3050 * Stop console printing because the unblank() callback may 3051 * assume the console is not within its write() callback. 3052 * 3053 * If @oops_in_progress is set, this may be an atomic context. 3054 * In that case, attempt a trylock as best-effort. 3055 */ 3056 if (oops_in_progress) { 3057 if (down_trylock_console_sem() != 0) 3058 return; 3059 } else 3060 console_lock(); 3061 3062 console_locked = 1; 3063 console_may_schedule = 0; 3064 3065 cookie = console_srcu_read_lock(); 3066 for_each_console_srcu(c) { 3067 if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank) 3068 c->unblank(); 3069 } 3070 console_srcu_read_unlock(cookie); 3071 3072 console_unlock(); 3073 3074 if (!oops_in_progress) 3075 pr_flush(1000, true); 3076 } 3077 3078 /** 3079 * console_flush_on_panic - flush console content on panic 3080 * @mode: flush all messages in buffer or just the pending ones 3081 * 3082 * Immediately output all pending messages no matter what. 3083 */ 3084 void console_flush_on_panic(enum con_flush_mode mode) 3085 { 3086 /* 3087 * If someone else is holding the console lock, trylock will fail 3088 * and may_schedule may be set. Ignore and proceed to unlock so 3089 * that messages are flushed out. As this can be called from any 3090 * context and we don't want to get preempted while flushing, 3091 * ensure may_schedule is cleared. 3092 */ 3093 console_trylock(); 3094 console_may_schedule = 0; 3095 3096 if (mode == CONSOLE_REPLAY_ALL) { 3097 struct console *c; 3098 int cookie; 3099 u64 seq; 3100 3101 seq = prb_first_valid_seq(prb); 3102 3103 cookie = console_srcu_read_lock(); 3104 for_each_console_srcu(c) { 3105 /* 3106 * If the above console_trylock() failed, this is an 3107 * unsynchronized assignment. But in that case, the 3108 * kernel is in "hope and pray" mode anyway. 3109 */ 3110 c->seq = seq; 3111 } 3112 console_srcu_read_unlock(cookie); 3113 } 3114 console_unlock(); 3115 } 3116 3117 /* 3118 * Return the console tty driver structure and its associated index 3119 */ 3120 struct tty_driver *console_device(int *index) 3121 { 3122 struct console *c; 3123 struct tty_driver *driver = NULL; 3124 int cookie; 3125 3126 /* 3127 * Take console_lock to serialize device() callback with 3128 * other console operations. For example, fg_console is 3129 * modified under console_lock when switching vt. 3130 */ 3131 console_lock(); 3132 3133 cookie = console_srcu_read_lock(); 3134 for_each_console_srcu(c) { 3135 if (!c->device) 3136 continue; 3137 driver = c->device(c, index); 3138 if (driver) 3139 break; 3140 } 3141 console_srcu_read_unlock(cookie); 3142 3143 console_unlock(); 3144 return driver; 3145 } 3146 3147 /* 3148 * Prevent further output on the passed console device so that (for example) 3149 * serial drivers can disable console output before suspending a port, and can 3150 * re-enable output afterwards. 3151 */ 3152 void console_stop(struct console *console) 3153 { 3154 __pr_flush(console, 1000, true); 3155 console_list_lock(); 3156 console_srcu_write_flags(console, console->flags & ~CON_ENABLED); 3157 console_list_unlock(); 3158 3159 /* 3160 * Ensure that all SRCU list walks have completed. All contexts must 3161 * be able to see that this console is disabled so that (for example) 3162 * the caller can suspend the port without risk of another context 3163 * using the port. 3164 */ 3165 synchronize_srcu(&console_srcu); 3166 } 3167 EXPORT_SYMBOL(console_stop); 3168 3169 void console_start(struct console *console) 3170 { 3171 console_list_lock(); 3172 console_srcu_write_flags(console, console->flags | CON_ENABLED); 3173 console_list_unlock(); 3174 __pr_flush(console, 1000, true); 3175 } 3176 EXPORT_SYMBOL(console_start); 3177 3178 static int __read_mostly keep_bootcon; 3179 3180 static int __init keep_bootcon_setup(char *str) 3181 { 3182 keep_bootcon = 1; 3183 pr_info("debug: skip boot console de-registration.\n"); 3184 3185 return 0; 3186 } 3187 3188 early_param("keep_bootcon", keep_bootcon_setup); 3189 3190 /* 3191 * This is called by register_console() to try to match 3192 * the newly registered console with any of the ones selected 3193 * by either the command line or add_preferred_console() and 3194 * setup/enable it. 3195 * 3196 * Care need to be taken with consoles that are statically 3197 * enabled such as netconsole 3198 */ 3199 static int try_enable_preferred_console(struct console *newcon, 3200 bool user_specified) 3201 { 3202 struct console_cmdline *c; 3203 int i, err; 3204 3205 for (i = 0, c = console_cmdline; 3206 i < MAX_CMDLINECONSOLES && c->name[0]; 3207 i++, c++) { 3208 if (c->user_specified != user_specified) 3209 continue; 3210 if (!newcon->match || 3211 newcon->match(newcon, c->name, c->index, c->options) != 0) { 3212 /* default matching */ 3213 BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name)); 3214 if (strcmp(c->name, newcon->name) != 0) 3215 continue; 3216 if (newcon->index >= 0 && 3217 newcon->index != c->index) 3218 continue; 3219 if (newcon->index < 0) 3220 newcon->index = c->index; 3221 3222 if (_braille_register_console(newcon, c)) 3223 return 0; 3224 3225 if (newcon->setup && 3226 (err = newcon->setup(newcon, c->options)) != 0) 3227 return err; 3228 } 3229 newcon->flags |= CON_ENABLED; 3230 if (i == preferred_console) 3231 newcon->flags |= CON_CONSDEV; 3232 return 0; 3233 } 3234 3235 /* 3236 * Some consoles, such as pstore and netconsole, can be enabled even 3237 * without matching. Accept the pre-enabled consoles only when match() 3238 * and setup() had a chance to be called. 3239 */ 3240 if (newcon->flags & CON_ENABLED && c->user_specified == user_specified) 3241 return 0; 3242 3243 return -ENOENT; 3244 } 3245 3246 /* Try to enable the console unconditionally */ 3247 static void try_enable_default_console(struct console *newcon) 3248 { 3249 if (newcon->index < 0) 3250 newcon->index = 0; 3251 3252 if (newcon->setup && newcon->setup(newcon, NULL) != 0) 3253 return; 3254 3255 newcon->flags |= CON_ENABLED; 3256 3257 if (newcon->device) 3258 newcon->flags |= CON_CONSDEV; 3259 } 3260 3261 #define con_printk(lvl, con, fmt, ...) \ 3262 printk(lvl pr_fmt("%sconsole [%s%d] " fmt), \ 3263 (con->flags & CON_BOOT) ? "boot" : "", \ 3264 con->name, con->index, ##__VA_ARGS__) 3265 3266 static void console_init_seq(struct console *newcon, bool bootcon_registered) 3267 { 3268 struct console *con; 3269 bool handover; 3270 3271 if (newcon->flags & (CON_PRINTBUFFER | CON_BOOT)) { 3272 /* Get a consistent copy of @syslog_seq. */ 3273 mutex_lock(&syslog_lock); 3274 newcon->seq = syslog_seq; 3275 mutex_unlock(&syslog_lock); 3276 } else { 3277 /* Begin with next message added to ringbuffer. */ 3278 newcon->seq = prb_next_seq(prb); 3279 3280 /* 3281 * If any enabled boot consoles are due to be unregistered 3282 * shortly, some may not be caught up and may be the same 3283 * device as @newcon. Since it is not known which boot console 3284 * is the same device, flush all consoles and, if necessary, 3285 * start with the message of the enabled boot console that is 3286 * the furthest behind. 3287 */ 3288 if (bootcon_registered && !keep_bootcon) { 3289 /* 3290 * Hold the console_lock to stop console printing and 3291 * guarantee safe access to console->seq. 3292 */ 3293 console_lock(); 3294 3295 /* 3296 * Flush all consoles and set the console to start at 3297 * the next unprinted sequence number. 3298 */ 3299 if (!console_flush_all(true, &newcon->seq, &handover)) { 3300 /* 3301 * Flushing failed. Just choose the lowest 3302 * sequence of the enabled boot consoles. 3303 */ 3304 3305 /* 3306 * If there was a handover, this context no 3307 * longer holds the console_lock. 3308 */ 3309 if (handover) 3310 console_lock(); 3311 3312 newcon->seq = prb_next_seq(prb); 3313 for_each_console(con) { 3314 if ((con->flags & CON_BOOT) && 3315 (con->flags & CON_ENABLED) && 3316 con->seq < newcon->seq) { 3317 newcon->seq = con->seq; 3318 } 3319 } 3320 } 3321 3322 console_unlock(); 3323 } 3324 } 3325 } 3326 3327 #define console_first() \ 3328 hlist_entry(console_list.first, struct console, node) 3329 3330 static int unregister_console_locked(struct console *console); 3331 3332 /* 3333 * The console driver calls this routine during kernel initialization 3334 * to register the console printing procedure with printk() and to 3335 * print any messages that were printed by the kernel before the 3336 * console driver was initialized. 3337 * 3338 * This can happen pretty early during the boot process (because of 3339 * early_printk) - sometimes before setup_arch() completes - be careful 3340 * of what kernel features are used - they may not be initialised yet. 3341 * 3342 * There are two types of consoles - bootconsoles (early_printk) and 3343 * "real" consoles (everything which is not a bootconsole) which are 3344 * handled differently. 3345 * - Any number of bootconsoles can be registered at any time. 3346 * - As soon as a "real" console is registered, all bootconsoles 3347 * will be unregistered automatically. 3348 * - Once a "real" console is registered, any attempt to register a 3349 * bootconsoles will be rejected 3350 */ 3351 void register_console(struct console *newcon) 3352 { 3353 struct console *con; 3354 bool bootcon_registered = false; 3355 bool realcon_registered = false; 3356 int err; 3357 3358 console_list_lock(); 3359 3360 for_each_console(con) { 3361 if (WARN(con == newcon, "console '%s%d' already registered\n", 3362 con->name, con->index)) { 3363 goto unlock; 3364 } 3365 3366 if (con->flags & CON_BOOT) 3367 bootcon_registered = true; 3368 else 3369 realcon_registered = true; 3370 } 3371 3372 /* Do not register boot consoles when there already is a real one. */ 3373 if ((newcon->flags & CON_BOOT) && realcon_registered) { 3374 pr_info("Too late to register bootconsole %s%d\n", 3375 newcon->name, newcon->index); 3376 goto unlock; 3377 } 3378 3379 /* 3380 * See if we want to enable this console driver by default. 3381 * 3382 * Nope when a console is preferred by the command line, device 3383 * tree, or SPCR. 3384 * 3385 * The first real console with tty binding (driver) wins. More 3386 * consoles might get enabled before the right one is found. 3387 * 3388 * Note that a console with tty binding will have CON_CONSDEV 3389 * flag set and will be first in the list. 3390 */ 3391 if (preferred_console < 0) { 3392 if (hlist_empty(&console_list) || !console_first()->device || 3393 console_first()->flags & CON_BOOT) { 3394 try_enable_default_console(newcon); 3395 } 3396 } 3397 3398 /* See if this console matches one we selected on the command line */ 3399 err = try_enable_preferred_console(newcon, true); 3400 3401 /* If not, try to match against the platform default(s) */ 3402 if (err == -ENOENT) 3403 err = try_enable_preferred_console(newcon, false); 3404 3405 /* printk() messages are not printed to the Braille console. */ 3406 if (err || newcon->flags & CON_BRL) 3407 goto unlock; 3408 3409 /* 3410 * If we have a bootconsole, and are switching to a real console, 3411 * don't print everything out again, since when the boot console, and 3412 * the real console are the same physical device, it's annoying to 3413 * see the beginning boot messages twice 3414 */ 3415 if (bootcon_registered && 3416 ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) { 3417 newcon->flags &= ~CON_PRINTBUFFER; 3418 } 3419 3420 newcon->dropped = 0; 3421 console_init_seq(newcon, bootcon_registered); 3422 3423 /* 3424 * Put this console in the list - keep the 3425 * preferred driver at the head of the list. 3426 */ 3427 if (hlist_empty(&console_list)) { 3428 /* Ensure CON_CONSDEV is always set for the head. */ 3429 newcon->flags |= CON_CONSDEV; 3430 hlist_add_head_rcu(&newcon->node, &console_list); 3431 3432 } else if (newcon->flags & CON_CONSDEV) { 3433 /* Only the new head can have CON_CONSDEV set. */ 3434 console_srcu_write_flags(console_first(), console_first()->flags & ~CON_CONSDEV); 3435 hlist_add_head_rcu(&newcon->node, &console_list); 3436 3437 } else { 3438 hlist_add_behind_rcu(&newcon->node, console_list.first); 3439 } 3440 3441 /* 3442 * No need to synchronize SRCU here! The caller does not rely 3443 * on all contexts being able to see the new console before 3444 * register_console() completes. 3445 */ 3446 3447 console_sysfs_notify(); 3448 3449 /* 3450 * By unregistering the bootconsoles after we enable the real console 3451 * we get the "console xxx enabled" message on all the consoles - 3452 * boot consoles, real consoles, etc - this is to ensure that end 3453 * users know there might be something in the kernel's log buffer that 3454 * went to the bootconsole (that they do not see on the real console) 3455 */ 3456 con_printk(KERN_INFO, newcon, "enabled\n"); 3457 if (bootcon_registered && 3458 ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) && 3459 !keep_bootcon) { 3460 struct hlist_node *tmp; 3461 3462 hlist_for_each_entry_safe(con, tmp, &console_list, node) { 3463 if (con->flags & CON_BOOT) 3464 unregister_console_locked(con); 3465 } 3466 } 3467 unlock: 3468 console_list_unlock(); 3469 } 3470 EXPORT_SYMBOL(register_console); 3471 3472 /* Must be called under console_list_lock(). */ 3473 static int unregister_console_locked(struct console *console) 3474 { 3475 int res; 3476 3477 lockdep_assert_console_list_lock_held(); 3478 3479 con_printk(KERN_INFO, console, "disabled\n"); 3480 3481 res = _braille_unregister_console(console); 3482 if (res < 0) 3483 return res; 3484 if (res > 0) 3485 return 0; 3486 3487 /* Disable it unconditionally */ 3488 console_srcu_write_flags(console, console->flags & ~CON_ENABLED); 3489 3490 if (!console_is_registered_locked(console)) 3491 return -ENODEV; 3492 3493 hlist_del_init_rcu(&console->node); 3494 3495 /* 3496 * <HISTORICAL> 3497 * If this isn't the last console and it has CON_CONSDEV set, we 3498 * need to set it on the next preferred console. 3499 * </HISTORICAL> 3500 * 3501 * The above makes no sense as there is no guarantee that the next 3502 * console has any device attached. Oh well.... 3503 */ 3504 if (!hlist_empty(&console_list) && console->flags & CON_CONSDEV) 3505 console_srcu_write_flags(console_first(), console_first()->flags | CON_CONSDEV); 3506 3507 /* 3508 * Ensure that all SRCU list walks have completed. All contexts 3509 * must not be able to see this console in the list so that any 3510 * exit/cleanup routines can be performed safely. 3511 */ 3512 synchronize_srcu(&console_srcu); 3513 3514 console_sysfs_notify(); 3515 3516 if (console->exit) 3517 res = console->exit(console); 3518 3519 return res; 3520 } 3521 3522 int unregister_console(struct console *console) 3523 { 3524 int res; 3525 3526 console_list_lock(); 3527 res = unregister_console_locked(console); 3528 console_list_unlock(); 3529 return res; 3530 } 3531 EXPORT_SYMBOL(unregister_console); 3532 3533 /** 3534 * console_force_preferred_locked - force a registered console preferred 3535 * @con: The registered console to force preferred. 3536 * 3537 * Must be called under console_list_lock(). 3538 */ 3539 void console_force_preferred_locked(struct console *con) 3540 { 3541 struct console *cur_pref_con; 3542 3543 if (!console_is_registered_locked(con)) 3544 return; 3545 3546 cur_pref_con = console_first(); 3547 3548 /* Already preferred? */ 3549 if (cur_pref_con == con) 3550 return; 3551 3552 /* 3553 * Delete, but do not re-initialize the entry. This allows the console 3554 * to continue to appear registered (via any hlist_unhashed_lockless() 3555 * checks), even though it was briefly removed from the console list. 3556 */ 3557 hlist_del_rcu(&con->node); 3558 3559 /* 3560 * Ensure that all SRCU list walks have completed so that the console 3561 * can be added to the beginning of the console list and its forward 3562 * list pointer can be re-initialized. 3563 */ 3564 synchronize_srcu(&console_srcu); 3565 3566 con->flags |= CON_CONSDEV; 3567 WARN_ON(!con->device); 3568 3569 /* Only the new head can have CON_CONSDEV set. */ 3570 console_srcu_write_flags(cur_pref_con, cur_pref_con->flags & ~CON_CONSDEV); 3571 hlist_add_head_rcu(&con->node, &console_list); 3572 } 3573 EXPORT_SYMBOL(console_force_preferred_locked); 3574 3575 /* 3576 * Initialize the console device. This is called *early*, so 3577 * we can't necessarily depend on lots of kernel help here. 3578 * Just do some early initializations, and do the complex setup 3579 * later. 3580 */ 3581 void __init console_init(void) 3582 { 3583 int ret; 3584 initcall_t call; 3585 initcall_entry_t *ce; 3586 3587 /* Setup the default TTY line discipline. */ 3588 n_tty_init(); 3589 3590 /* 3591 * set up the console device so that later boot sequences can 3592 * inform about problems etc.. 3593 */ 3594 ce = __con_initcall_start; 3595 trace_initcall_level("console"); 3596 while (ce < __con_initcall_end) { 3597 call = initcall_from_entry(ce); 3598 trace_initcall_start(call); 3599 ret = call(); 3600 trace_initcall_finish(call, ret); 3601 ce++; 3602 } 3603 } 3604 3605 /* 3606 * Some boot consoles access data that is in the init section and which will 3607 * be discarded after the initcalls have been run. To make sure that no code 3608 * will access this data, unregister the boot consoles in a late initcall. 3609 * 3610 * If for some reason, such as deferred probe or the driver being a loadable 3611 * module, the real console hasn't registered yet at this point, there will 3612 * be a brief interval in which no messages are logged to the console, which 3613 * makes it difficult to diagnose problems that occur during this time. 3614 * 3615 * To mitigate this problem somewhat, only unregister consoles whose memory 3616 * intersects with the init section. Note that all other boot consoles will 3617 * get unregistered when the real preferred console is registered. 3618 */ 3619 static int __init printk_late_init(void) 3620 { 3621 struct hlist_node *tmp; 3622 struct console *con; 3623 int ret; 3624 3625 console_list_lock(); 3626 hlist_for_each_entry_safe(con, tmp, &console_list, node) { 3627 if (!(con->flags & CON_BOOT)) 3628 continue; 3629 3630 /* Check addresses that might be used for enabled consoles. */ 3631 if (init_section_intersects(con, sizeof(*con)) || 3632 init_section_contains(con->write, 0) || 3633 init_section_contains(con->read, 0) || 3634 init_section_contains(con->device, 0) || 3635 init_section_contains(con->unblank, 0) || 3636 init_section_contains(con->data, 0)) { 3637 /* 3638 * Please, consider moving the reported consoles out 3639 * of the init section. 3640 */ 3641 pr_warn("bootconsole [%s%d] uses init memory and must be disabled even before the real one is ready\n", 3642 con->name, con->index); 3643 unregister_console_locked(con); 3644 } 3645 } 3646 console_list_unlock(); 3647 3648 ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL, 3649 console_cpu_notify); 3650 WARN_ON(ret < 0); 3651 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online", 3652 console_cpu_notify, NULL); 3653 WARN_ON(ret < 0); 3654 printk_sysctl_init(); 3655 return 0; 3656 } 3657 late_initcall(printk_late_init); 3658 3659 #if defined CONFIG_PRINTK 3660 /* If @con is specified, only wait for that console. Otherwise wait for all. */ 3661 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) 3662 { 3663 int remaining = timeout_ms; 3664 struct console *c; 3665 u64 last_diff = 0; 3666 u64 printk_seq; 3667 int cookie; 3668 u64 diff; 3669 u64 seq; 3670 3671 might_sleep(); 3672 3673 seq = prb_next_seq(prb); 3674 3675 for (;;) { 3676 diff = 0; 3677 3678 /* 3679 * Hold the console_lock to guarantee safe access to 3680 * console->seq and to prevent changes to @console_suspended 3681 * until all consoles have been processed. 3682 */ 3683 console_lock(); 3684 3685 cookie = console_srcu_read_lock(); 3686 for_each_console_srcu(c) { 3687 if (con && con != c) 3688 continue; 3689 if (!console_is_usable(c)) 3690 continue; 3691 printk_seq = c->seq; 3692 if (printk_seq < seq) 3693 diff += seq - printk_seq; 3694 } 3695 console_srcu_read_unlock(cookie); 3696 3697 /* 3698 * If consoles are suspended, it cannot be expected that they 3699 * make forward progress, so timeout immediately. @diff is 3700 * still used to return a valid flush status. 3701 */ 3702 if (console_suspended) 3703 remaining = 0; 3704 else if (diff != last_diff && reset_on_progress) 3705 remaining = timeout_ms; 3706 3707 console_unlock(); 3708 3709 if (diff == 0 || remaining == 0) 3710 break; 3711 3712 if (remaining < 0) { 3713 /* no timeout limit */ 3714 msleep(100); 3715 } else if (remaining < 100) { 3716 msleep(remaining); 3717 remaining = 0; 3718 } else { 3719 msleep(100); 3720 remaining -= 100; 3721 } 3722 3723 last_diff = diff; 3724 } 3725 3726 return (diff == 0); 3727 } 3728 3729 /** 3730 * pr_flush() - Wait for printing threads to catch up. 3731 * 3732 * @timeout_ms: The maximum time (in ms) to wait. 3733 * @reset_on_progress: Reset the timeout if forward progress is seen. 3734 * 3735 * A value of 0 for @timeout_ms means no waiting will occur. A value of -1 3736 * represents infinite waiting. 3737 * 3738 * If @reset_on_progress is true, the timeout will be reset whenever any 3739 * printer has been seen to make some forward progress. 3740 * 3741 * Context: Process context. May sleep while acquiring console lock. 3742 * Return: true if all enabled printers are caught up. 3743 */ 3744 static bool pr_flush(int timeout_ms, bool reset_on_progress) 3745 { 3746 return __pr_flush(NULL, timeout_ms, reset_on_progress); 3747 } 3748 3749 /* 3750 * Delayed printk version, for scheduler-internal messages: 3751 */ 3752 #define PRINTK_PENDING_WAKEUP 0x01 3753 #define PRINTK_PENDING_OUTPUT 0x02 3754 3755 static DEFINE_PER_CPU(int, printk_pending); 3756 3757 static void wake_up_klogd_work_func(struct irq_work *irq_work) 3758 { 3759 int pending = this_cpu_xchg(printk_pending, 0); 3760 3761 if (pending & PRINTK_PENDING_OUTPUT) { 3762 /* If trylock fails, someone else is doing the printing */ 3763 if (console_trylock()) 3764 console_unlock(); 3765 } 3766 3767 if (pending & PRINTK_PENDING_WAKEUP) 3768 wake_up_interruptible(&log_wait); 3769 } 3770 3771 static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = 3772 IRQ_WORK_INIT_LAZY(wake_up_klogd_work_func); 3773 3774 static void __wake_up_klogd(int val) 3775 { 3776 if (!printk_percpu_data_ready()) 3777 return; 3778 3779 preempt_disable(); 3780 /* 3781 * Guarantee any new records can be seen by tasks preparing to wait 3782 * before this context checks if the wait queue is empty. 3783 * 3784 * The full memory barrier within wq_has_sleeper() pairs with the full 3785 * memory barrier within set_current_state() of 3786 * prepare_to_wait_event(), which is called after ___wait_event() adds 3787 * the waiter but before it has checked the wait condition. 3788 * 3789 * This pairs with devkmsg_read:A and syslog_print:A. 3790 */ 3791 if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */ 3792 (val & PRINTK_PENDING_OUTPUT)) { 3793 this_cpu_or(printk_pending, val); 3794 irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); 3795 } 3796 preempt_enable(); 3797 } 3798 3799 void wake_up_klogd(void) 3800 { 3801 __wake_up_klogd(PRINTK_PENDING_WAKEUP); 3802 } 3803 3804 void defer_console_output(void) 3805 { 3806 /* 3807 * New messages may have been added directly to the ringbuffer 3808 * using vprintk_store(), so wake any waiters as well. 3809 */ 3810 __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); 3811 } 3812 3813 void printk_trigger_flush(void) 3814 { 3815 defer_console_output(); 3816 } 3817 3818 int vprintk_deferred(const char *fmt, va_list args) 3819 { 3820 int r; 3821 3822 r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); 3823 defer_console_output(); 3824 3825 return r; 3826 } 3827 3828 int _printk_deferred(const char *fmt, ...) 3829 { 3830 va_list args; 3831 int r; 3832 3833 va_start(args, fmt); 3834 r = vprintk_deferred(fmt, args); 3835 va_end(args); 3836 3837 return r; 3838 } 3839 3840 /* 3841 * printk rate limiting, lifted from the networking subsystem. 3842 * 3843 * This enforces a rate limit: not more than 10 kernel messages 3844 * every 5s to make a denial-of-service attack impossible. 3845 */ 3846 DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); 3847 3848 int __printk_ratelimit(const char *func) 3849 { 3850 return ___ratelimit(&printk_ratelimit_state, func); 3851 } 3852 EXPORT_SYMBOL(__printk_ratelimit); 3853 3854 /** 3855 * printk_timed_ratelimit - caller-controlled printk ratelimiting 3856 * @caller_jiffies: pointer to caller's state 3857 * @interval_msecs: minimum interval between prints 3858 * 3859 * printk_timed_ratelimit() returns true if more than @interval_msecs 3860 * milliseconds have elapsed since the last time printk_timed_ratelimit() 3861 * returned true. 3862 */ 3863 bool printk_timed_ratelimit(unsigned long *caller_jiffies, 3864 unsigned int interval_msecs) 3865 { 3866 unsigned long elapsed = jiffies - *caller_jiffies; 3867 3868 if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs)) 3869 return false; 3870 3871 *caller_jiffies = jiffies; 3872 return true; 3873 } 3874 EXPORT_SYMBOL(printk_timed_ratelimit); 3875 3876 static DEFINE_SPINLOCK(dump_list_lock); 3877 static LIST_HEAD(dump_list); 3878 3879 /** 3880 * kmsg_dump_register - register a kernel log dumper. 3881 * @dumper: pointer to the kmsg_dumper structure 3882 * 3883 * Adds a kernel log dumper to the system. The dump callback in the 3884 * structure will be called when the kernel oopses or panics and must be 3885 * set. Returns zero on success and %-EINVAL or %-EBUSY otherwise. 3886 */ 3887 int kmsg_dump_register(struct kmsg_dumper *dumper) 3888 { 3889 unsigned long flags; 3890 int err = -EBUSY; 3891 3892 /* The dump callback needs to be set */ 3893 if (!dumper->dump) 3894 return -EINVAL; 3895 3896 spin_lock_irqsave(&dump_list_lock, flags); 3897 /* Don't allow registering multiple times */ 3898 if (!dumper->registered) { 3899 dumper->registered = 1; 3900 list_add_tail_rcu(&dumper->list, &dump_list); 3901 err = 0; 3902 } 3903 spin_unlock_irqrestore(&dump_list_lock, flags); 3904 3905 return err; 3906 } 3907 EXPORT_SYMBOL_GPL(kmsg_dump_register); 3908 3909 /** 3910 * kmsg_dump_unregister - unregister a kmsg dumper. 3911 * @dumper: pointer to the kmsg_dumper structure 3912 * 3913 * Removes a dump device from the system. Returns zero on success and 3914 * %-EINVAL otherwise. 3915 */ 3916 int kmsg_dump_unregister(struct kmsg_dumper *dumper) 3917 { 3918 unsigned long flags; 3919 int err = -EINVAL; 3920 3921 spin_lock_irqsave(&dump_list_lock, flags); 3922 if (dumper->registered) { 3923 dumper->registered = 0; 3924 list_del_rcu(&dumper->list); 3925 err = 0; 3926 } 3927 spin_unlock_irqrestore(&dump_list_lock, flags); 3928 synchronize_rcu(); 3929 3930 return err; 3931 } 3932 EXPORT_SYMBOL_GPL(kmsg_dump_unregister); 3933 3934 static bool always_kmsg_dump; 3935 module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR); 3936 3937 const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason) 3938 { 3939 switch (reason) { 3940 case KMSG_DUMP_PANIC: 3941 return "Panic"; 3942 case KMSG_DUMP_OOPS: 3943 return "Oops"; 3944 case KMSG_DUMP_EMERG: 3945 return "Emergency"; 3946 case KMSG_DUMP_SHUTDOWN: 3947 return "Shutdown"; 3948 default: 3949 return "Unknown"; 3950 } 3951 } 3952 EXPORT_SYMBOL_GPL(kmsg_dump_reason_str); 3953 3954 /** 3955 * kmsg_dump - dump kernel log to kernel message dumpers. 3956 * @reason: the reason (oops, panic etc) for dumping 3957 * 3958 * Call each of the registered dumper's dump() callback, which can 3959 * retrieve the kmsg records with kmsg_dump_get_line() or 3960 * kmsg_dump_get_buffer(). 3961 */ 3962 void kmsg_dump(enum kmsg_dump_reason reason) 3963 { 3964 struct kmsg_dumper *dumper; 3965 3966 rcu_read_lock(); 3967 list_for_each_entry_rcu(dumper, &dump_list, list) { 3968 enum kmsg_dump_reason max_reason = dumper->max_reason; 3969 3970 /* 3971 * If client has not provided a specific max_reason, default 3972 * to KMSG_DUMP_OOPS, unless always_kmsg_dump was set. 3973 */ 3974 if (max_reason == KMSG_DUMP_UNDEF) { 3975 max_reason = always_kmsg_dump ? KMSG_DUMP_MAX : 3976 KMSG_DUMP_OOPS; 3977 } 3978 if (reason > max_reason) 3979 continue; 3980 3981 /* invoke dumper which will iterate over records */ 3982 dumper->dump(dumper, reason); 3983 } 3984 rcu_read_unlock(); 3985 } 3986 3987 /** 3988 * kmsg_dump_get_line - retrieve one kmsg log line 3989 * @iter: kmsg dump iterator 3990 * @syslog: include the "<4>" prefixes 3991 * @line: buffer to copy the line to 3992 * @size: maximum size of the buffer 3993 * @len: length of line placed into buffer 3994 * 3995 * Start at the beginning of the kmsg buffer, with the oldest kmsg 3996 * record, and copy one record into the provided buffer. 3997 * 3998 * Consecutive calls will return the next available record moving 3999 * towards the end of the buffer with the youngest messages. 4000 * 4001 * A return value of FALSE indicates that there are no more records to 4002 * read. 4003 */ 4004 bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, 4005 char *line, size_t size, size_t *len) 4006 { 4007 u64 min_seq = latched_seq_read_nolock(&clear_seq); 4008 struct printk_info info; 4009 unsigned int line_count; 4010 struct printk_record r; 4011 size_t l = 0; 4012 bool ret = false; 4013 4014 if (iter->cur_seq < min_seq) 4015 iter->cur_seq = min_seq; 4016 4017 prb_rec_init_rd(&r, &info, line, size); 4018 4019 /* Read text or count text lines? */ 4020 if (line) { 4021 if (!prb_read_valid(prb, iter->cur_seq, &r)) 4022 goto out; 4023 l = record_print_text(&r, syslog, printk_time); 4024 } else { 4025 if (!prb_read_valid_info(prb, iter->cur_seq, 4026 &info, &line_count)) { 4027 goto out; 4028 } 4029 l = get_record_print_text_size(&info, line_count, syslog, 4030 printk_time); 4031 4032 } 4033 4034 iter->cur_seq = r.info->seq + 1; 4035 ret = true; 4036 out: 4037 if (len) 4038 *len = l; 4039 return ret; 4040 } 4041 EXPORT_SYMBOL_GPL(kmsg_dump_get_line); 4042 4043 /** 4044 * kmsg_dump_get_buffer - copy kmsg log lines 4045 * @iter: kmsg dump iterator 4046 * @syslog: include the "<4>" prefixes 4047 * @buf: buffer to copy the line to 4048 * @size: maximum size of the buffer 4049 * @len_out: length of line placed into buffer 4050 * 4051 * Start at the end of the kmsg buffer and fill the provided buffer 4052 * with as many of the *youngest* kmsg records that fit into it. 4053 * If the buffer is large enough, all available kmsg records will be 4054 * copied with a single call. 4055 * 4056 * Consecutive calls will fill the buffer with the next block of 4057 * available older records, not including the earlier retrieved ones. 4058 * 4059 * A return value of FALSE indicates that there are no more records to 4060 * read. 4061 */ 4062 bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, 4063 char *buf, size_t size, size_t *len_out) 4064 { 4065 u64 min_seq = latched_seq_read_nolock(&clear_seq); 4066 struct printk_info info; 4067 struct printk_record r; 4068 u64 seq; 4069 u64 next_seq; 4070 size_t len = 0; 4071 bool ret = false; 4072 bool time = printk_time; 4073 4074 if (!buf || !size) 4075 goto out; 4076 4077 if (iter->cur_seq < min_seq) 4078 iter->cur_seq = min_seq; 4079 4080 if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) { 4081 if (info.seq != iter->cur_seq) { 4082 /* messages are gone, move to first available one */ 4083 iter->cur_seq = info.seq; 4084 } 4085 } 4086 4087 /* last entry */ 4088 if (iter->cur_seq >= iter->next_seq) 4089 goto out; 4090 4091 /* 4092 * Find first record that fits, including all following records, 4093 * into the user-provided buffer for this dump. Pass in size-1 4094 * because this function (by way of record_print_text()) will 4095 * not write more than size-1 bytes of text into @buf. 4096 */ 4097 seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq, 4098 size - 1, syslog, time); 4099 4100 /* 4101 * Next kmsg_dump_get_buffer() invocation will dump block of 4102 * older records stored right before this one. 4103 */ 4104 next_seq = seq; 4105 4106 prb_rec_init_rd(&r, &info, buf, size); 4107 4108 len = 0; 4109 prb_for_each_record(seq, prb, seq, &r) { 4110 if (r.info->seq >= iter->next_seq) 4111 break; 4112 4113 len += record_print_text(&r, syslog, time); 4114 4115 /* Adjust record to store to remaining buffer space. */ 4116 prb_rec_init_rd(&r, &info, buf + len, size - len); 4117 } 4118 4119 iter->next_seq = next_seq; 4120 ret = true; 4121 out: 4122 if (len_out) 4123 *len_out = len; 4124 return ret; 4125 } 4126 EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); 4127 4128 /** 4129 * kmsg_dump_rewind - reset the iterator 4130 * @iter: kmsg dump iterator 4131 * 4132 * Reset the dumper's iterator so that kmsg_dump_get_line() and 4133 * kmsg_dump_get_buffer() can be called again and used multiple 4134 * times within the same dumper.dump() callback. 4135 */ 4136 void kmsg_dump_rewind(struct kmsg_dump_iter *iter) 4137 { 4138 iter->cur_seq = latched_seq_read_nolock(&clear_seq); 4139 iter->next_seq = prb_next_seq(prb); 4140 } 4141 EXPORT_SYMBOL_GPL(kmsg_dump_rewind); 4142 4143 #endif 4144 4145 #ifdef CONFIG_SMP 4146 static atomic_t printk_cpu_sync_owner = ATOMIC_INIT(-1); 4147 static atomic_t printk_cpu_sync_nested = ATOMIC_INIT(0); 4148 4149 /** 4150 * __printk_cpu_sync_wait() - Busy wait until the printk cpu-reentrant 4151 * spinning lock is not owned by any CPU. 4152 * 4153 * Context: Any context. 4154 */ 4155 void __printk_cpu_sync_wait(void) 4156 { 4157 do { 4158 cpu_relax(); 4159 } while (atomic_read(&printk_cpu_sync_owner) != -1); 4160 } 4161 EXPORT_SYMBOL(__printk_cpu_sync_wait); 4162 4163 /** 4164 * __printk_cpu_sync_try_get() - Try to acquire the printk cpu-reentrant 4165 * spinning lock. 4166 * 4167 * If no processor has the lock, the calling processor takes the lock and 4168 * becomes the owner. If the calling processor is already the owner of the 4169 * lock, this function succeeds immediately. 4170 * 4171 * Context: Any context. Expects interrupts to be disabled. 4172 * Return: 1 on success, otherwise 0. 4173 */ 4174 int __printk_cpu_sync_try_get(void) 4175 { 4176 int cpu; 4177 int old; 4178 4179 cpu = smp_processor_id(); 4180 4181 /* 4182 * Guarantee loads and stores from this CPU when it is the lock owner 4183 * are _not_ visible to the previous lock owner. This pairs with 4184 * __printk_cpu_sync_put:B. 4185 * 4186 * Memory barrier involvement: 4187 * 4188 * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B, 4189 * then __printk_cpu_sync_put:A can never read from 4190 * __printk_cpu_sync_try_get:B. 4191 * 4192 * Relies on: 4193 * 4194 * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B 4195 * of the previous CPU 4196 * matching 4197 * ACQUIRE from __printk_cpu_sync_try_get:A to 4198 * __printk_cpu_sync_try_get:B of this CPU 4199 */ 4200 old = atomic_cmpxchg_acquire(&printk_cpu_sync_owner, -1, 4201 cpu); /* LMM(__printk_cpu_sync_try_get:A) */ 4202 if (old == -1) { 4203 /* 4204 * This CPU is now the owner and begins loading/storing 4205 * data: LMM(__printk_cpu_sync_try_get:B) 4206 */ 4207 return 1; 4208 4209 } else if (old == cpu) { 4210 /* This CPU is already the owner. */ 4211 atomic_inc(&printk_cpu_sync_nested); 4212 return 1; 4213 } 4214 4215 return 0; 4216 } 4217 EXPORT_SYMBOL(__printk_cpu_sync_try_get); 4218 4219 /** 4220 * __printk_cpu_sync_put() - Release the printk cpu-reentrant spinning lock. 4221 * 4222 * The calling processor must be the owner of the lock. 4223 * 4224 * Context: Any context. Expects interrupts to be disabled. 4225 */ 4226 void __printk_cpu_sync_put(void) 4227 { 4228 if (atomic_read(&printk_cpu_sync_nested)) { 4229 atomic_dec(&printk_cpu_sync_nested); 4230 return; 4231 } 4232 4233 /* 4234 * This CPU is finished loading/storing data: 4235 * LMM(__printk_cpu_sync_put:A) 4236 */ 4237 4238 /* 4239 * Guarantee loads and stores from this CPU when it was the 4240 * lock owner are visible to the next lock owner. This pairs 4241 * with __printk_cpu_sync_try_get:A. 4242 * 4243 * Memory barrier involvement: 4244 * 4245 * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B, 4246 * then __printk_cpu_sync_try_get:B reads from __printk_cpu_sync_put:A. 4247 * 4248 * Relies on: 4249 * 4250 * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B 4251 * of this CPU 4252 * matching 4253 * ACQUIRE from __printk_cpu_sync_try_get:A to 4254 * __printk_cpu_sync_try_get:B of the next CPU 4255 */ 4256 atomic_set_release(&printk_cpu_sync_owner, 4257 -1); /* LMM(__printk_cpu_sync_put:B) */ 4258 } 4259 EXPORT_SYMBOL(__printk_cpu_sync_put); 4260 #endif /* CONFIG_SMP */ 4261