1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/kernel/printk.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 * 7 * Modified to make sys_syslog() more flexible: added commands to 8 * return the last 4k of kernel messages, regardless of whether 9 * they've been read or not. Added option to suppress kernel printk's 10 * to the console. Added hook for sending the console messages 11 * elsewhere, in preparation for a serial line console (someday). 12 * Ted Ts'o, 2/11/93. 13 * Modified for sysctl support, 1/8/97, Chris Horn. 14 * Fixed SMP synchronization, 08/08/99, Manfred Spraul 15 * manfred@colorfullife.com 16 * Rewrote bits to get rid of console_lock 17 * 01Mar01 Andrew Morton 18 */ 19 20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 21 22 #include <linux/kernel.h> 23 #include <linux/mm.h> 24 #include <linux/tty.h> 25 #include <linux/tty_driver.h> 26 #include <linux/console.h> 27 #include <linux/init.h> 28 #include <linux/jiffies.h> 29 #include <linux/nmi.h> 30 #include <linux/module.h> 31 #include <linux/moduleparam.h> 32 #include <linux/delay.h> 33 #include <linux/smp.h> 34 #include <linux/security.h> 35 #include <linux/memblock.h> 36 #include <linux/syscalls.h> 37 #include <linux/crash_core.h> 38 #include <linux/ratelimit.h> 39 #include <linux/kmsg_dump.h> 40 #include <linux/syslog.h> 41 #include <linux/cpu.h> 42 #include <linux/rculist.h> 43 #include <linux/poll.h> 44 #include <linux/irq_work.h> 45 #include <linux/ctype.h> 46 #include <linux/uio.h> 47 #include <linux/sched/clock.h> 48 #include <linux/sched/debug.h> 49 #include <linux/sched/task_stack.h> 50 51 #include <linux/uaccess.h> 52 #include <asm/sections.h> 53 54 #include <trace/events/initcall.h> 55 #define CREATE_TRACE_POINTS 56 #include <trace/events/printk.h> 57 58 #include "printk_ringbuffer.h" 59 #include "console_cmdline.h" 60 #include "braille.h" 61 #include "internal.h" 62 63 int console_printk[4] = { 64 CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ 65 MESSAGE_LOGLEVEL_DEFAULT, /* default_message_loglevel */ 66 CONSOLE_LOGLEVEL_MIN, /* minimum_console_loglevel */ 67 CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */ 68 }; 69 EXPORT_SYMBOL_GPL(console_printk); 70 71 atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0); 72 EXPORT_SYMBOL(ignore_console_lock_warning); 73 74 /* 75 * Low level drivers may need that to know if they can schedule in 76 * their unblank() callback or not. So let's export it. 77 */ 78 int oops_in_progress; 79 EXPORT_SYMBOL(oops_in_progress); 80 81 /* 82 * console_mutex protects console_list updates and console->flags updates. 83 * The flags are synchronized only for consoles that are registered, i.e. 84 * accessible via the console list. 85 */ 86 static DEFINE_MUTEX(console_mutex); 87 88 /* 89 * console_sem protects updates to console->seq and console_suspended, 90 * and also provides serialization for console printing. 91 */ 92 static DEFINE_SEMAPHORE(console_sem); 93 HLIST_HEAD(console_list); 94 EXPORT_SYMBOL_GPL(console_list); 95 DEFINE_STATIC_SRCU(console_srcu); 96 97 /* 98 * System may need to suppress printk message under certain 99 * circumstances, like after kernel panic happens. 100 */ 101 int __read_mostly suppress_printk; 102 103 /* 104 * During panic, heavy printk by other CPUs can delay the 105 * panic and risk deadlock on console resources. 106 */ 107 static int __read_mostly suppress_panic_printk; 108 109 #ifdef CONFIG_LOCKDEP 110 static struct lockdep_map console_lock_dep_map = { 111 .name = "console_lock" 112 }; 113 114 void lockdep_assert_console_list_lock_held(void) 115 { 116 lockdep_assert_held(&console_mutex); 117 } 118 EXPORT_SYMBOL(lockdep_assert_console_list_lock_held); 119 #endif 120 121 #ifdef CONFIG_DEBUG_LOCK_ALLOC 122 bool console_srcu_read_lock_is_held(void) 123 { 124 return srcu_read_lock_held(&console_srcu); 125 } 126 EXPORT_SYMBOL(console_srcu_read_lock_is_held); 127 #endif 128 129 enum devkmsg_log_bits { 130 __DEVKMSG_LOG_BIT_ON = 0, 131 __DEVKMSG_LOG_BIT_OFF, 132 __DEVKMSG_LOG_BIT_LOCK, 133 }; 134 135 enum devkmsg_log_masks { 136 DEVKMSG_LOG_MASK_ON = BIT(__DEVKMSG_LOG_BIT_ON), 137 DEVKMSG_LOG_MASK_OFF = BIT(__DEVKMSG_LOG_BIT_OFF), 138 DEVKMSG_LOG_MASK_LOCK = BIT(__DEVKMSG_LOG_BIT_LOCK), 139 }; 140 141 /* Keep both the 'on' and 'off' bits clear, i.e. ratelimit by default: */ 142 #define DEVKMSG_LOG_MASK_DEFAULT 0 143 144 static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; 145 146 static int __control_devkmsg(char *str) 147 { 148 size_t len; 149 150 if (!str) 151 return -EINVAL; 152 153 len = str_has_prefix(str, "on"); 154 if (len) { 155 devkmsg_log = DEVKMSG_LOG_MASK_ON; 156 return len; 157 } 158 159 len = str_has_prefix(str, "off"); 160 if (len) { 161 devkmsg_log = DEVKMSG_LOG_MASK_OFF; 162 return len; 163 } 164 165 len = str_has_prefix(str, "ratelimit"); 166 if (len) { 167 devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; 168 return len; 169 } 170 171 return -EINVAL; 172 } 173 174 static int __init control_devkmsg(char *str) 175 { 176 if (__control_devkmsg(str) < 0) { 177 pr_warn("printk.devkmsg: bad option string '%s'\n", str); 178 return 1; 179 } 180 181 /* 182 * Set sysctl string accordingly: 183 */ 184 if (devkmsg_log == DEVKMSG_LOG_MASK_ON) 185 strcpy(devkmsg_log_str, "on"); 186 else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF) 187 strcpy(devkmsg_log_str, "off"); 188 /* else "ratelimit" which is set by default. */ 189 190 /* 191 * Sysctl cannot change it anymore. The kernel command line setting of 192 * this parameter is to force the setting to be permanent throughout the 193 * runtime of the system. This is a precation measure against userspace 194 * trying to be a smarta** and attempting to change it up on us. 195 */ 196 devkmsg_log |= DEVKMSG_LOG_MASK_LOCK; 197 198 return 1; 199 } 200 __setup("printk.devkmsg=", control_devkmsg); 201 202 char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit"; 203 #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) 204 int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, 205 void *buffer, size_t *lenp, loff_t *ppos) 206 { 207 char old_str[DEVKMSG_STR_MAX_SIZE]; 208 unsigned int old; 209 int err; 210 211 if (write) { 212 if (devkmsg_log & DEVKMSG_LOG_MASK_LOCK) 213 return -EINVAL; 214 215 old = devkmsg_log; 216 strncpy(old_str, devkmsg_log_str, DEVKMSG_STR_MAX_SIZE); 217 } 218 219 err = proc_dostring(table, write, buffer, lenp, ppos); 220 if (err) 221 return err; 222 223 if (write) { 224 err = __control_devkmsg(devkmsg_log_str); 225 226 /* 227 * Do not accept an unknown string OR a known string with 228 * trailing crap... 229 */ 230 if (err < 0 || (err + 1 != *lenp)) { 231 232 /* ... and restore old setting. */ 233 devkmsg_log = old; 234 strncpy(devkmsg_log_str, old_str, DEVKMSG_STR_MAX_SIZE); 235 236 return -EINVAL; 237 } 238 } 239 240 return 0; 241 } 242 #endif /* CONFIG_PRINTK && CONFIG_SYSCTL */ 243 244 /** 245 * console_list_lock - Lock the console list 246 * 247 * For console list or console->flags updates 248 */ 249 void console_list_lock(void) 250 { 251 /* 252 * In unregister_console() and console_force_preferred_locked(), 253 * synchronize_srcu() is called with the console_list_lock held. 254 * Therefore it is not allowed that the console_list_lock is taken 255 * with the srcu_lock held. 256 * 257 * Detecting if this context is really in the read-side critical 258 * section is only possible if the appropriate debug options are 259 * enabled. 260 */ 261 WARN_ON_ONCE(debug_lockdep_rcu_enabled() && 262 srcu_read_lock_held(&console_srcu)); 263 264 mutex_lock(&console_mutex); 265 } 266 EXPORT_SYMBOL(console_list_lock); 267 268 /** 269 * console_list_unlock - Unlock the console list 270 * 271 * Counterpart to console_list_lock() 272 */ 273 void console_list_unlock(void) 274 { 275 mutex_unlock(&console_mutex); 276 } 277 EXPORT_SYMBOL(console_list_unlock); 278 279 /** 280 * console_srcu_read_lock - Register a new reader for the 281 * SRCU-protected console list 282 * 283 * Use for_each_console_srcu() to iterate the console list 284 * 285 * Context: Any context. 286 * Return: A cookie to pass to console_srcu_read_unlock(). 287 */ 288 int console_srcu_read_lock(void) 289 { 290 return srcu_read_lock_nmisafe(&console_srcu); 291 } 292 EXPORT_SYMBOL(console_srcu_read_lock); 293 294 /** 295 * console_srcu_read_unlock - Unregister an old reader from 296 * the SRCU-protected console list 297 * @cookie: cookie returned from console_srcu_read_lock() 298 * 299 * Counterpart to console_srcu_read_lock() 300 */ 301 void console_srcu_read_unlock(int cookie) 302 { 303 srcu_read_unlock_nmisafe(&console_srcu, cookie); 304 } 305 EXPORT_SYMBOL(console_srcu_read_unlock); 306 307 /* 308 * Helper macros to handle lockdep when locking/unlocking console_sem. We use 309 * macros instead of functions so that _RET_IP_ contains useful information. 310 */ 311 #define down_console_sem() do { \ 312 down(&console_sem);\ 313 mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);\ 314 } while (0) 315 316 static int __down_trylock_console_sem(unsigned long ip) 317 { 318 int lock_failed; 319 unsigned long flags; 320 321 /* 322 * Here and in __up_console_sem() we need to be in safe mode, 323 * because spindump/WARN/etc from under console ->lock will 324 * deadlock in printk()->down_trylock_console_sem() otherwise. 325 */ 326 printk_safe_enter_irqsave(flags); 327 lock_failed = down_trylock(&console_sem); 328 printk_safe_exit_irqrestore(flags); 329 330 if (lock_failed) 331 return 1; 332 mutex_acquire(&console_lock_dep_map, 0, 1, ip); 333 return 0; 334 } 335 #define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_) 336 337 static void __up_console_sem(unsigned long ip) 338 { 339 unsigned long flags; 340 341 mutex_release(&console_lock_dep_map, ip); 342 343 printk_safe_enter_irqsave(flags); 344 up(&console_sem); 345 printk_safe_exit_irqrestore(flags); 346 } 347 #define up_console_sem() __up_console_sem(_RET_IP_) 348 349 static bool panic_in_progress(void) 350 { 351 return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); 352 } 353 354 /* 355 * This is used for debugging the mess that is the VT code by 356 * keeping track if we have the console semaphore held. It's 357 * definitely not the perfect debug tool (we don't know if _WE_ 358 * hold it and are racing, but it helps tracking those weird code 359 * paths in the console code where we end up in places I want 360 * locked without the console semaphore held). 361 */ 362 static int console_locked, console_suspended; 363 364 /* 365 * Array of consoles built from command line options (console=) 366 */ 367 368 #define MAX_CMDLINECONSOLES 8 369 370 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; 371 372 static int preferred_console = -1; 373 int console_set_on_cmdline; 374 EXPORT_SYMBOL(console_set_on_cmdline); 375 376 /* Flag: console code may call schedule() */ 377 static int console_may_schedule; 378 379 enum con_msg_format_flags { 380 MSG_FORMAT_DEFAULT = 0, 381 MSG_FORMAT_SYSLOG = (1 << 0), 382 }; 383 384 static int console_msg_format = MSG_FORMAT_DEFAULT; 385 386 /* 387 * The printk log buffer consists of a sequenced collection of records, each 388 * containing variable length message text. Every record also contains its 389 * own meta-data (@info). 390 * 391 * Every record meta-data carries the timestamp in microseconds, as well as 392 * the standard userspace syslog level and syslog facility. The usual kernel 393 * messages use LOG_KERN; userspace-injected messages always carry a matching 394 * syslog facility, by default LOG_USER. The origin of every message can be 395 * reliably determined that way. 396 * 397 * The human readable log message of a record is available in @text, the 398 * length of the message text in @text_len. The stored message is not 399 * terminated. 400 * 401 * Optionally, a record can carry a dictionary of properties (key/value 402 * pairs), to provide userspace with a machine-readable message context. 403 * 404 * Examples for well-defined, commonly used property names are: 405 * DEVICE=b12:8 device identifier 406 * b12:8 block dev_t 407 * c127:3 char dev_t 408 * n8 netdev ifindex 409 * +sound:card0 subsystem:devname 410 * SUBSYSTEM=pci driver-core subsystem name 411 * 412 * Valid characters in property names are [a-zA-Z0-9.-_]. Property names 413 * and values are terminated by a '\0' character. 414 * 415 * Example of record values: 416 * record.text_buf = "it's a line" (unterminated) 417 * record.info.seq = 56 418 * record.info.ts_nsec = 36863 419 * record.info.text_len = 11 420 * record.info.facility = 0 (LOG_KERN) 421 * record.info.flags = 0 422 * record.info.level = 3 (LOG_ERR) 423 * record.info.caller_id = 299 (task 299) 424 * record.info.dev_info.subsystem = "pci" (terminated) 425 * record.info.dev_info.device = "+pci:0000:00:01.0" (terminated) 426 * 427 * The 'struct printk_info' buffer must never be directly exported to 428 * userspace, it is a kernel-private implementation detail that might 429 * need to be changed in the future, when the requirements change. 430 * 431 * /dev/kmsg exports the structured data in the following line format: 432 * "<level>,<sequnum>,<timestamp>,<contflag>[,additional_values, ... ];<message text>\n" 433 * 434 * Users of the export format should ignore possible additional values 435 * separated by ',', and find the message after the ';' character. 436 * 437 * The optional key/value pairs are attached as continuation lines starting 438 * with a space character and terminated by a newline. All possible 439 * non-prinatable characters are escaped in the "\xff" notation. 440 */ 441 442 /* syslog_lock protects syslog_* variables and write access to clear_seq. */ 443 static DEFINE_MUTEX(syslog_lock); 444 445 #ifdef CONFIG_PRINTK 446 DECLARE_WAIT_QUEUE_HEAD(log_wait); 447 /* All 3 protected by @syslog_lock. */ 448 /* the next printk record to read by syslog(READ) or /proc/kmsg */ 449 static u64 syslog_seq; 450 static size_t syslog_partial; 451 static bool syslog_time; 452 453 struct latched_seq { 454 seqcount_latch_t latch; 455 u64 val[2]; 456 }; 457 458 /* 459 * The next printk record to read after the last 'clear' command. There are 460 * two copies (updated with seqcount_latch) so that reads can locklessly 461 * access a valid value. Writers are synchronized by @syslog_lock. 462 */ 463 static struct latched_seq clear_seq = { 464 .latch = SEQCNT_LATCH_ZERO(clear_seq.latch), 465 .val[0] = 0, 466 .val[1] = 0, 467 }; 468 469 #ifdef CONFIG_PRINTK_CALLER 470 #define PREFIX_MAX 48 471 #else 472 #define PREFIX_MAX 32 473 #endif 474 475 /* the maximum size of a formatted record (i.e. with prefix added per line) */ 476 #define CONSOLE_LOG_MAX 1024 477 478 /* the maximum size for a dropped text message */ 479 #define DROPPED_TEXT_MAX 64 480 481 /* the maximum size allowed to be reserved for a record */ 482 #define LOG_LINE_MAX (CONSOLE_LOG_MAX - PREFIX_MAX) 483 484 #define LOG_LEVEL(v) ((v) & 0x07) 485 #define LOG_FACILITY(v) ((v) >> 3 & 0xff) 486 487 /* record buffer */ 488 #define LOG_ALIGN __alignof__(unsigned long) 489 #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) 490 #define LOG_BUF_LEN_MAX (u32)(1 << 31) 491 static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); 492 static char *log_buf = __log_buf; 493 static u32 log_buf_len = __LOG_BUF_LEN; 494 495 /* 496 * Define the average message size. This only affects the number of 497 * descriptors that will be available. Underestimating is better than 498 * overestimating (too many available descriptors is better than not enough). 499 */ 500 #define PRB_AVGBITS 5 /* 32 character average length */ 501 502 #if CONFIG_LOG_BUF_SHIFT <= PRB_AVGBITS 503 #error CONFIG_LOG_BUF_SHIFT value too small. 504 #endif 505 _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, 506 PRB_AVGBITS, &__log_buf[0]); 507 508 static struct printk_ringbuffer printk_rb_dynamic; 509 510 static struct printk_ringbuffer *prb = &printk_rb_static; 511 512 /* 513 * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before 514 * per_cpu_areas are initialised. This variable is set to true when 515 * it's safe to access per-CPU data. 516 */ 517 static bool __printk_percpu_data_ready __ro_after_init; 518 519 bool printk_percpu_data_ready(void) 520 { 521 return __printk_percpu_data_ready; 522 } 523 524 /* Must be called under syslog_lock. */ 525 static void latched_seq_write(struct latched_seq *ls, u64 val) 526 { 527 raw_write_seqcount_latch(&ls->latch); 528 ls->val[0] = val; 529 raw_write_seqcount_latch(&ls->latch); 530 ls->val[1] = val; 531 } 532 533 /* Can be called from any context. */ 534 static u64 latched_seq_read_nolock(struct latched_seq *ls) 535 { 536 unsigned int seq; 537 unsigned int idx; 538 u64 val; 539 540 do { 541 seq = raw_read_seqcount_latch(&ls->latch); 542 idx = seq & 0x1; 543 val = ls->val[idx]; 544 } while (read_seqcount_latch_retry(&ls->latch, seq)); 545 546 return val; 547 } 548 549 /* Return log buffer address */ 550 char *log_buf_addr_get(void) 551 { 552 return log_buf; 553 } 554 555 /* Return log buffer size */ 556 u32 log_buf_len_get(void) 557 { 558 return log_buf_len; 559 } 560 561 /* 562 * Define how much of the log buffer we could take at maximum. The value 563 * must be greater than two. Note that only half of the buffer is available 564 * when the index points to the middle. 565 */ 566 #define MAX_LOG_TAKE_PART 4 567 static const char trunc_msg[] = "<truncated>"; 568 569 static void truncate_msg(u16 *text_len, u16 *trunc_msg_len) 570 { 571 /* 572 * The message should not take the whole buffer. Otherwise, it might 573 * get removed too soon. 574 */ 575 u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART; 576 577 if (*text_len > max_text_len) 578 *text_len = max_text_len; 579 580 /* enable the warning message (if there is room) */ 581 *trunc_msg_len = strlen(trunc_msg); 582 if (*text_len >= *trunc_msg_len) 583 *text_len -= *trunc_msg_len; 584 else 585 *trunc_msg_len = 0; 586 } 587 588 int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT); 589 590 static int syslog_action_restricted(int type) 591 { 592 if (dmesg_restrict) 593 return 1; 594 /* 595 * Unless restricted, we allow "read all" and "get buffer size" 596 * for everybody. 597 */ 598 return type != SYSLOG_ACTION_READ_ALL && 599 type != SYSLOG_ACTION_SIZE_BUFFER; 600 } 601 602 static int check_syslog_permissions(int type, int source) 603 { 604 /* 605 * If this is from /proc/kmsg and we've already opened it, then we've 606 * already done the capabilities checks at open time. 607 */ 608 if (source == SYSLOG_FROM_PROC && type != SYSLOG_ACTION_OPEN) 609 goto ok; 610 611 if (syslog_action_restricted(type)) { 612 if (capable(CAP_SYSLOG)) 613 goto ok; 614 /* 615 * For historical reasons, accept CAP_SYS_ADMIN too, with 616 * a warning. 617 */ 618 if (capable(CAP_SYS_ADMIN)) { 619 pr_warn_once("%s (%d): Attempt to access syslog with " 620 "CAP_SYS_ADMIN but no CAP_SYSLOG " 621 "(deprecated).\n", 622 current->comm, task_pid_nr(current)); 623 goto ok; 624 } 625 return -EPERM; 626 } 627 ok: 628 return security_syslog(type); 629 } 630 631 static void append_char(char **pp, char *e, char c) 632 { 633 if (*pp < e) 634 *(*pp)++ = c; 635 } 636 637 static ssize_t info_print_ext_header(char *buf, size_t size, 638 struct printk_info *info) 639 { 640 u64 ts_usec = info->ts_nsec; 641 char caller[20]; 642 #ifdef CONFIG_PRINTK_CALLER 643 u32 id = info->caller_id; 644 645 snprintf(caller, sizeof(caller), ",caller=%c%u", 646 id & 0x80000000 ? 'C' : 'T', id & ~0x80000000); 647 #else 648 caller[0] = '\0'; 649 #endif 650 651 do_div(ts_usec, 1000); 652 653 return scnprintf(buf, size, "%u,%llu,%llu,%c%s;", 654 (info->facility << 3) | info->level, info->seq, 655 ts_usec, info->flags & LOG_CONT ? 'c' : '-', caller); 656 } 657 658 static ssize_t msg_add_ext_text(char *buf, size_t size, 659 const char *text, size_t text_len, 660 unsigned char endc) 661 { 662 char *p = buf, *e = buf + size; 663 size_t i; 664 665 /* escape non-printable characters */ 666 for (i = 0; i < text_len; i++) { 667 unsigned char c = text[i]; 668 669 if (c < ' ' || c >= 127 || c == '\\') 670 p += scnprintf(p, e - p, "\\x%02x", c); 671 else 672 append_char(&p, e, c); 673 } 674 append_char(&p, e, endc); 675 676 return p - buf; 677 } 678 679 static ssize_t msg_add_dict_text(char *buf, size_t size, 680 const char *key, const char *val) 681 { 682 size_t val_len = strlen(val); 683 ssize_t len; 684 685 if (!val_len) 686 return 0; 687 688 len = msg_add_ext_text(buf, size, "", 0, ' '); /* dict prefix */ 689 len += msg_add_ext_text(buf + len, size - len, key, strlen(key), '='); 690 len += msg_add_ext_text(buf + len, size - len, val, val_len, '\n'); 691 692 return len; 693 } 694 695 static ssize_t msg_print_ext_body(char *buf, size_t size, 696 char *text, size_t text_len, 697 struct dev_printk_info *dev_info) 698 { 699 ssize_t len; 700 701 len = msg_add_ext_text(buf, size, text, text_len, '\n'); 702 703 if (!dev_info) 704 goto out; 705 706 len += msg_add_dict_text(buf + len, size - len, "SUBSYSTEM", 707 dev_info->subsystem); 708 len += msg_add_dict_text(buf + len, size - len, "DEVICE", 709 dev_info->device); 710 out: 711 return len; 712 } 713 714 /* /dev/kmsg - userspace message inject/listen interface */ 715 struct devkmsg_user { 716 atomic64_t seq; 717 struct ratelimit_state rs; 718 struct mutex lock; 719 char buf[CONSOLE_EXT_LOG_MAX]; 720 721 struct printk_info info; 722 char text_buf[CONSOLE_EXT_LOG_MAX]; 723 struct printk_record record; 724 }; 725 726 static __printf(3, 4) __cold 727 int devkmsg_emit(int facility, int level, const char *fmt, ...) 728 { 729 va_list args; 730 int r; 731 732 va_start(args, fmt); 733 r = vprintk_emit(facility, level, NULL, fmt, args); 734 va_end(args); 735 736 return r; 737 } 738 739 static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) 740 { 741 char *buf, *line; 742 int level = default_message_loglevel; 743 int facility = 1; /* LOG_USER */ 744 struct file *file = iocb->ki_filp; 745 struct devkmsg_user *user = file->private_data; 746 size_t len = iov_iter_count(from); 747 ssize_t ret = len; 748 749 if (!user || len > LOG_LINE_MAX) 750 return -EINVAL; 751 752 /* Ignore when user logging is disabled. */ 753 if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) 754 return len; 755 756 /* Ratelimit when not explicitly enabled. */ 757 if (!(devkmsg_log & DEVKMSG_LOG_MASK_ON)) { 758 if (!___ratelimit(&user->rs, current->comm)) 759 return ret; 760 } 761 762 buf = kmalloc(len+1, GFP_KERNEL); 763 if (buf == NULL) 764 return -ENOMEM; 765 766 buf[len] = '\0'; 767 if (!copy_from_iter_full(buf, len, from)) { 768 kfree(buf); 769 return -EFAULT; 770 } 771 772 /* 773 * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace 774 * the decimal value represents 32bit, the lower 3 bit are the log 775 * level, the rest are the log facility. 776 * 777 * If no prefix or no userspace facility is specified, we 778 * enforce LOG_USER, to be able to reliably distinguish 779 * kernel-generated messages from userspace-injected ones. 780 */ 781 line = buf; 782 if (line[0] == '<') { 783 char *endp = NULL; 784 unsigned int u; 785 786 u = simple_strtoul(line + 1, &endp, 10); 787 if (endp && endp[0] == '>') { 788 level = LOG_LEVEL(u); 789 if (LOG_FACILITY(u) != 0) 790 facility = LOG_FACILITY(u); 791 endp++; 792 line = endp; 793 } 794 } 795 796 devkmsg_emit(facility, level, "%s", line); 797 kfree(buf); 798 return ret; 799 } 800 801 static ssize_t devkmsg_read(struct file *file, char __user *buf, 802 size_t count, loff_t *ppos) 803 { 804 struct devkmsg_user *user = file->private_data; 805 struct printk_record *r = &user->record; 806 size_t len; 807 ssize_t ret; 808 809 if (!user) 810 return -EBADF; 811 812 ret = mutex_lock_interruptible(&user->lock); 813 if (ret) 814 return ret; 815 816 if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) { 817 if (file->f_flags & O_NONBLOCK) { 818 ret = -EAGAIN; 819 goto out; 820 } 821 822 /* 823 * Guarantee this task is visible on the waitqueue before 824 * checking the wake condition. 825 * 826 * The full memory barrier within set_current_state() of 827 * prepare_to_wait_event() pairs with the full memory barrier 828 * within wq_has_sleeper(). 829 * 830 * This pairs with __wake_up_klogd:A. 831 */ 832 ret = wait_event_interruptible(log_wait, 833 prb_read_valid(prb, 834 atomic64_read(&user->seq), r)); /* LMM(devkmsg_read:A) */ 835 if (ret) 836 goto out; 837 } 838 839 if (r->info->seq != atomic64_read(&user->seq)) { 840 /* our last seen message is gone, return error and reset */ 841 atomic64_set(&user->seq, r->info->seq); 842 ret = -EPIPE; 843 goto out; 844 } 845 846 len = info_print_ext_header(user->buf, sizeof(user->buf), r->info); 847 len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len, 848 &r->text_buf[0], r->info->text_len, 849 &r->info->dev_info); 850 851 atomic64_set(&user->seq, r->info->seq + 1); 852 853 if (len > count) { 854 ret = -EINVAL; 855 goto out; 856 } 857 858 if (copy_to_user(buf, user->buf, len)) { 859 ret = -EFAULT; 860 goto out; 861 } 862 ret = len; 863 out: 864 mutex_unlock(&user->lock); 865 return ret; 866 } 867 868 /* 869 * Be careful when modifying this function!!! 870 * 871 * Only few operations are supported because the device works only with the 872 * entire variable length messages (records). Non-standard values are 873 * returned in the other cases and has been this way for quite some time. 874 * User space applications might depend on this behavior. 875 */ 876 static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) 877 { 878 struct devkmsg_user *user = file->private_data; 879 loff_t ret = 0; 880 881 if (!user) 882 return -EBADF; 883 if (offset) 884 return -ESPIPE; 885 886 switch (whence) { 887 case SEEK_SET: 888 /* the first record */ 889 atomic64_set(&user->seq, prb_first_valid_seq(prb)); 890 break; 891 case SEEK_DATA: 892 /* 893 * The first record after the last SYSLOG_ACTION_CLEAR, 894 * like issued by 'dmesg -c'. Reading /dev/kmsg itself 895 * changes no global state, and does not clear anything. 896 */ 897 atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq)); 898 break; 899 case SEEK_END: 900 /* after the last record */ 901 atomic64_set(&user->seq, prb_next_seq(prb)); 902 break; 903 default: 904 ret = -EINVAL; 905 } 906 return ret; 907 } 908 909 static __poll_t devkmsg_poll(struct file *file, poll_table *wait) 910 { 911 struct devkmsg_user *user = file->private_data; 912 struct printk_info info; 913 __poll_t ret = 0; 914 915 if (!user) 916 return EPOLLERR|EPOLLNVAL; 917 918 poll_wait(file, &log_wait, wait); 919 920 if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) { 921 /* return error when data has vanished underneath us */ 922 if (info.seq != atomic64_read(&user->seq)) 923 ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; 924 else 925 ret = EPOLLIN|EPOLLRDNORM; 926 } 927 928 return ret; 929 } 930 931 static int devkmsg_open(struct inode *inode, struct file *file) 932 { 933 struct devkmsg_user *user; 934 int err; 935 936 if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) 937 return -EPERM; 938 939 /* write-only does not need any file context */ 940 if ((file->f_flags & O_ACCMODE) != O_WRONLY) { 941 err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL, 942 SYSLOG_FROM_READER); 943 if (err) 944 return err; 945 } 946 947 user = kvmalloc(sizeof(struct devkmsg_user), GFP_KERNEL); 948 if (!user) 949 return -ENOMEM; 950 951 ratelimit_default_init(&user->rs); 952 ratelimit_set_flags(&user->rs, RATELIMIT_MSG_ON_RELEASE); 953 954 mutex_init(&user->lock); 955 956 prb_rec_init_rd(&user->record, &user->info, 957 &user->text_buf[0], sizeof(user->text_buf)); 958 959 atomic64_set(&user->seq, prb_first_valid_seq(prb)); 960 961 file->private_data = user; 962 return 0; 963 } 964 965 static int devkmsg_release(struct inode *inode, struct file *file) 966 { 967 struct devkmsg_user *user = file->private_data; 968 969 if (!user) 970 return 0; 971 972 ratelimit_state_exit(&user->rs); 973 974 mutex_destroy(&user->lock); 975 kvfree(user); 976 return 0; 977 } 978 979 const struct file_operations kmsg_fops = { 980 .open = devkmsg_open, 981 .read = devkmsg_read, 982 .write_iter = devkmsg_write, 983 .llseek = devkmsg_llseek, 984 .poll = devkmsg_poll, 985 .release = devkmsg_release, 986 }; 987 988 #ifdef CONFIG_CRASH_CORE 989 /* 990 * This appends the listed symbols to /proc/vmcore 991 * 992 * /proc/vmcore is used by various utilities, like crash and makedumpfile to 993 * obtain access to symbols that are otherwise very difficult to locate. These 994 * symbols are specifically used so that utilities can access and extract the 995 * dmesg log from a vmcore file after a crash. 996 */ 997 void log_buf_vmcoreinfo_setup(void) 998 { 999 struct dev_printk_info *dev_info = NULL; 1000 1001 VMCOREINFO_SYMBOL(prb); 1002 VMCOREINFO_SYMBOL(printk_rb_static); 1003 VMCOREINFO_SYMBOL(clear_seq); 1004 1005 /* 1006 * Export struct size and field offsets. User space tools can 1007 * parse it and detect any changes to structure down the line. 1008 */ 1009 1010 VMCOREINFO_STRUCT_SIZE(printk_ringbuffer); 1011 VMCOREINFO_OFFSET(printk_ringbuffer, desc_ring); 1012 VMCOREINFO_OFFSET(printk_ringbuffer, text_data_ring); 1013 VMCOREINFO_OFFSET(printk_ringbuffer, fail); 1014 1015 VMCOREINFO_STRUCT_SIZE(prb_desc_ring); 1016 VMCOREINFO_OFFSET(prb_desc_ring, count_bits); 1017 VMCOREINFO_OFFSET(prb_desc_ring, descs); 1018 VMCOREINFO_OFFSET(prb_desc_ring, infos); 1019 VMCOREINFO_OFFSET(prb_desc_ring, head_id); 1020 VMCOREINFO_OFFSET(prb_desc_ring, tail_id); 1021 1022 VMCOREINFO_STRUCT_SIZE(prb_desc); 1023 VMCOREINFO_OFFSET(prb_desc, state_var); 1024 VMCOREINFO_OFFSET(prb_desc, text_blk_lpos); 1025 1026 VMCOREINFO_STRUCT_SIZE(prb_data_blk_lpos); 1027 VMCOREINFO_OFFSET(prb_data_blk_lpos, begin); 1028 VMCOREINFO_OFFSET(prb_data_blk_lpos, next); 1029 1030 VMCOREINFO_STRUCT_SIZE(printk_info); 1031 VMCOREINFO_OFFSET(printk_info, seq); 1032 VMCOREINFO_OFFSET(printk_info, ts_nsec); 1033 VMCOREINFO_OFFSET(printk_info, text_len); 1034 VMCOREINFO_OFFSET(printk_info, caller_id); 1035 VMCOREINFO_OFFSET(printk_info, dev_info); 1036 1037 VMCOREINFO_STRUCT_SIZE(dev_printk_info); 1038 VMCOREINFO_OFFSET(dev_printk_info, subsystem); 1039 VMCOREINFO_LENGTH(printk_info_subsystem, sizeof(dev_info->subsystem)); 1040 VMCOREINFO_OFFSET(dev_printk_info, device); 1041 VMCOREINFO_LENGTH(printk_info_device, sizeof(dev_info->device)); 1042 1043 VMCOREINFO_STRUCT_SIZE(prb_data_ring); 1044 VMCOREINFO_OFFSET(prb_data_ring, size_bits); 1045 VMCOREINFO_OFFSET(prb_data_ring, data); 1046 VMCOREINFO_OFFSET(prb_data_ring, head_lpos); 1047 VMCOREINFO_OFFSET(prb_data_ring, tail_lpos); 1048 1049 VMCOREINFO_SIZE(atomic_long_t); 1050 VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter); 1051 1052 VMCOREINFO_STRUCT_SIZE(latched_seq); 1053 VMCOREINFO_OFFSET(latched_seq, val); 1054 } 1055 #endif 1056 1057 /* requested log_buf_len from kernel cmdline */ 1058 static unsigned long __initdata new_log_buf_len; 1059 1060 /* we practice scaling the ring buffer by powers of 2 */ 1061 static void __init log_buf_len_update(u64 size) 1062 { 1063 if (size > (u64)LOG_BUF_LEN_MAX) { 1064 size = (u64)LOG_BUF_LEN_MAX; 1065 pr_err("log_buf over 2G is not supported.\n"); 1066 } 1067 1068 if (size) 1069 size = roundup_pow_of_two(size); 1070 if (size > log_buf_len) 1071 new_log_buf_len = (unsigned long)size; 1072 } 1073 1074 /* save requested log_buf_len since it's too early to process it */ 1075 static int __init log_buf_len_setup(char *str) 1076 { 1077 u64 size; 1078 1079 if (!str) 1080 return -EINVAL; 1081 1082 size = memparse(str, &str); 1083 1084 log_buf_len_update(size); 1085 1086 return 0; 1087 } 1088 early_param("log_buf_len", log_buf_len_setup); 1089 1090 #ifdef CONFIG_SMP 1091 #define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT) 1092 1093 static void __init log_buf_add_cpu(void) 1094 { 1095 unsigned int cpu_extra; 1096 1097 /* 1098 * archs should set up cpu_possible_bits properly with 1099 * set_cpu_possible() after setup_arch() but just in 1100 * case lets ensure this is valid. 1101 */ 1102 if (num_possible_cpus() == 1) 1103 return; 1104 1105 cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN; 1106 1107 /* by default this will only continue through for large > 64 CPUs */ 1108 if (cpu_extra <= __LOG_BUF_LEN / 2) 1109 return; 1110 1111 pr_info("log_buf_len individual max cpu contribution: %d bytes\n", 1112 __LOG_CPU_MAX_BUF_LEN); 1113 pr_info("log_buf_len total cpu_extra contributions: %d bytes\n", 1114 cpu_extra); 1115 pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN); 1116 1117 log_buf_len_update(cpu_extra + __LOG_BUF_LEN); 1118 } 1119 #else /* !CONFIG_SMP */ 1120 static inline void log_buf_add_cpu(void) {} 1121 #endif /* CONFIG_SMP */ 1122 1123 static void __init set_percpu_data_ready(void) 1124 { 1125 __printk_percpu_data_ready = true; 1126 } 1127 1128 static unsigned int __init add_to_rb(struct printk_ringbuffer *rb, 1129 struct printk_record *r) 1130 { 1131 struct prb_reserved_entry e; 1132 struct printk_record dest_r; 1133 1134 prb_rec_init_wr(&dest_r, r->info->text_len); 1135 1136 if (!prb_reserve(&e, rb, &dest_r)) 1137 return 0; 1138 1139 memcpy(&dest_r.text_buf[0], &r->text_buf[0], r->info->text_len); 1140 dest_r.info->text_len = r->info->text_len; 1141 dest_r.info->facility = r->info->facility; 1142 dest_r.info->level = r->info->level; 1143 dest_r.info->flags = r->info->flags; 1144 dest_r.info->ts_nsec = r->info->ts_nsec; 1145 dest_r.info->caller_id = r->info->caller_id; 1146 memcpy(&dest_r.info->dev_info, &r->info->dev_info, sizeof(dest_r.info->dev_info)); 1147 1148 prb_final_commit(&e); 1149 1150 return prb_record_text_space(&e); 1151 } 1152 1153 static char setup_text_buf[LOG_LINE_MAX] __initdata; 1154 1155 void __init setup_log_buf(int early) 1156 { 1157 struct printk_info *new_infos; 1158 unsigned int new_descs_count; 1159 struct prb_desc *new_descs; 1160 struct printk_info info; 1161 struct printk_record r; 1162 unsigned int text_size; 1163 size_t new_descs_size; 1164 size_t new_infos_size; 1165 unsigned long flags; 1166 char *new_log_buf; 1167 unsigned int free; 1168 u64 seq; 1169 1170 /* 1171 * Some archs call setup_log_buf() multiple times - first is very 1172 * early, e.g. from setup_arch(), and second - when percpu_areas 1173 * are initialised. 1174 */ 1175 if (!early) 1176 set_percpu_data_ready(); 1177 1178 if (log_buf != __log_buf) 1179 return; 1180 1181 if (!early && !new_log_buf_len) 1182 log_buf_add_cpu(); 1183 1184 if (!new_log_buf_len) 1185 return; 1186 1187 new_descs_count = new_log_buf_len >> PRB_AVGBITS; 1188 if (new_descs_count == 0) { 1189 pr_err("new_log_buf_len: %lu too small\n", new_log_buf_len); 1190 return; 1191 } 1192 1193 new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN); 1194 if (unlikely(!new_log_buf)) { 1195 pr_err("log_buf_len: %lu text bytes not available\n", 1196 new_log_buf_len); 1197 return; 1198 } 1199 1200 new_descs_size = new_descs_count * sizeof(struct prb_desc); 1201 new_descs = memblock_alloc(new_descs_size, LOG_ALIGN); 1202 if (unlikely(!new_descs)) { 1203 pr_err("log_buf_len: %zu desc bytes not available\n", 1204 new_descs_size); 1205 goto err_free_log_buf; 1206 } 1207 1208 new_infos_size = new_descs_count * sizeof(struct printk_info); 1209 new_infos = memblock_alloc(new_infos_size, LOG_ALIGN); 1210 if (unlikely(!new_infos)) { 1211 pr_err("log_buf_len: %zu info bytes not available\n", 1212 new_infos_size); 1213 goto err_free_descs; 1214 } 1215 1216 prb_rec_init_rd(&r, &info, &setup_text_buf[0], sizeof(setup_text_buf)); 1217 1218 prb_init(&printk_rb_dynamic, 1219 new_log_buf, ilog2(new_log_buf_len), 1220 new_descs, ilog2(new_descs_count), 1221 new_infos); 1222 1223 local_irq_save(flags); 1224 1225 log_buf_len = new_log_buf_len; 1226 log_buf = new_log_buf; 1227 new_log_buf_len = 0; 1228 1229 free = __LOG_BUF_LEN; 1230 prb_for_each_record(0, &printk_rb_static, seq, &r) { 1231 text_size = add_to_rb(&printk_rb_dynamic, &r); 1232 if (text_size > free) 1233 free = 0; 1234 else 1235 free -= text_size; 1236 } 1237 1238 prb = &printk_rb_dynamic; 1239 1240 local_irq_restore(flags); 1241 1242 /* 1243 * Copy any remaining messages that might have appeared from 1244 * NMI context after copying but before switching to the 1245 * dynamic buffer. 1246 */ 1247 prb_for_each_record(seq, &printk_rb_static, seq, &r) { 1248 text_size = add_to_rb(&printk_rb_dynamic, &r); 1249 if (text_size > free) 1250 free = 0; 1251 else 1252 free -= text_size; 1253 } 1254 1255 if (seq != prb_next_seq(&printk_rb_static)) { 1256 pr_err("dropped %llu messages\n", 1257 prb_next_seq(&printk_rb_static) - seq); 1258 } 1259 1260 pr_info("log_buf_len: %u bytes\n", log_buf_len); 1261 pr_info("early log buf free: %u(%u%%)\n", 1262 free, (free * 100) / __LOG_BUF_LEN); 1263 return; 1264 1265 err_free_descs: 1266 memblock_free(new_descs, new_descs_size); 1267 err_free_log_buf: 1268 memblock_free(new_log_buf, new_log_buf_len); 1269 } 1270 1271 static bool __read_mostly ignore_loglevel; 1272 1273 static int __init ignore_loglevel_setup(char *str) 1274 { 1275 ignore_loglevel = true; 1276 pr_info("debug: ignoring loglevel setting.\n"); 1277 1278 return 0; 1279 } 1280 1281 early_param("ignore_loglevel", ignore_loglevel_setup); 1282 module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR); 1283 MODULE_PARM_DESC(ignore_loglevel, 1284 "ignore loglevel setting (prints all kernel messages to the console)"); 1285 1286 static bool suppress_message_printing(int level) 1287 { 1288 return (level >= console_loglevel && !ignore_loglevel); 1289 } 1290 1291 #ifdef CONFIG_BOOT_PRINTK_DELAY 1292 1293 static int boot_delay; /* msecs delay after each printk during bootup */ 1294 static unsigned long long loops_per_msec; /* based on boot_delay */ 1295 1296 static int __init boot_delay_setup(char *str) 1297 { 1298 unsigned long lpj; 1299 1300 lpj = preset_lpj ? preset_lpj : 1000000; /* some guess */ 1301 loops_per_msec = (unsigned long long)lpj / 1000 * HZ; 1302 1303 get_option(&str, &boot_delay); 1304 if (boot_delay > 10 * 1000) 1305 boot_delay = 0; 1306 1307 pr_debug("boot_delay: %u, preset_lpj: %ld, lpj: %lu, " 1308 "HZ: %d, loops_per_msec: %llu\n", 1309 boot_delay, preset_lpj, lpj, HZ, loops_per_msec); 1310 return 0; 1311 } 1312 early_param("boot_delay", boot_delay_setup); 1313 1314 static void boot_delay_msec(int level) 1315 { 1316 unsigned long long k; 1317 unsigned long timeout; 1318 1319 if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING) 1320 || suppress_message_printing(level)) { 1321 return; 1322 } 1323 1324 k = (unsigned long long)loops_per_msec * boot_delay; 1325 1326 timeout = jiffies + msecs_to_jiffies(boot_delay); 1327 while (k) { 1328 k--; 1329 cpu_relax(); 1330 /* 1331 * use (volatile) jiffies to prevent 1332 * compiler reduction; loop termination via jiffies 1333 * is secondary and may or may not happen. 1334 */ 1335 if (time_after(jiffies, timeout)) 1336 break; 1337 touch_nmi_watchdog(); 1338 } 1339 } 1340 #else 1341 static inline void boot_delay_msec(int level) 1342 { 1343 } 1344 #endif 1345 1346 static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME); 1347 module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); 1348 1349 static size_t print_syslog(unsigned int level, char *buf) 1350 { 1351 return sprintf(buf, "<%u>", level); 1352 } 1353 1354 static size_t print_time(u64 ts, char *buf) 1355 { 1356 unsigned long rem_nsec = do_div(ts, 1000000000); 1357 1358 return sprintf(buf, "[%5lu.%06lu]", 1359 (unsigned long)ts, rem_nsec / 1000); 1360 } 1361 1362 #ifdef CONFIG_PRINTK_CALLER 1363 static size_t print_caller(u32 id, char *buf) 1364 { 1365 char caller[12]; 1366 1367 snprintf(caller, sizeof(caller), "%c%u", 1368 id & 0x80000000 ? 'C' : 'T', id & ~0x80000000); 1369 return sprintf(buf, "[%6s]", caller); 1370 } 1371 #else 1372 #define print_caller(id, buf) 0 1373 #endif 1374 1375 static size_t info_print_prefix(const struct printk_info *info, bool syslog, 1376 bool time, char *buf) 1377 { 1378 size_t len = 0; 1379 1380 if (syslog) 1381 len = print_syslog((info->facility << 3) | info->level, buf); 1382 1383 if (time) 1384 len += print_time(info->ts_nsec, buf + len); 1385 1386 len += print_caller(info->caller_id, buf + len); 1387 1388 if (IS_ENABLED(CONFIG_PRINTK_CALLER) || time) { 1389 buf[len++] = ' '; 1390 buf[len] = '\0'; 1391 } 1392 1393 return len; 1394 } 1395 1396 /* 1397 * Prepare the record for printing. The text is shifted within the given 1398 * buffer to avoid a need for another one. The following operations are 1399 * done: 1400 * 1401 * - Add prefix for each line. 1402 * - Drop truncated lines that no longer fit into the buffer. 1403 * - Add the trailing newline that has been removed in vprintk_store(). 1404 * - Add a string terminator. 1405 * 1406 * Since the produced string is always terminated, the maximum possible 1407 * return value is @r->text_buf_size - 1; 1408 * 1409 * Return: The length of the updated/prepared text, including the added 1410 * prefixes and the newline. The terminator is not counted. The dropped 1411 * line(s) are not counted. 1412 */ 1413 static size_t record_print_text(struct printk_record *r, bool syslog, 1414 bool time) 1415 { 1416 size_t text_len = r->info->text_len; 1417 size_t buf_size = r->text_buf_size; 1418 char *text = r->text_buf; 1419 char prefix[PREFIX_MAX]; 1420 bool truncated = false; 1421 size_t prefix_len; 1422 size_t line_len; 1423 size_t len = 0; 1424 char *next; 1425 1426 /* 1427 * If the message was truncated because the buffer was not large 1428 * enough, treat the available text as if it were the full text. 1429 */ 1430 if (text_len > buf_size) 1431 text_len = buf_size; 1432 1433 prefix_len = info_print_prefix(r->info, syslog, time, prefix); 1434 1435 /* 1436 * @text_len: bytes of unprocessed text 1437 * @line_len: bytes of current line _without_ newline 1438 * @text: pointer to beginning of current line 1439 * @len: number of bytes prepared in r->text_buf 1440 */ 1441 for (;;) { 1442 next = memchr(text, '\n', text_len); 1443 if (next) { 1444 line_len = next - text; 1445 } else { 1446 /* Drop truncated line(s). */ 1447 if (truncated) 1448 break; 1449 line_len = text_len; 1450 } 1451 1452 /* 1453 * Truncate the text if there is not enough space to add the 1454 * prefix and a trailing newline and a terminator. 1455 */ 1456 if (len + prefix_len + text_len + 1 + 1 > buf_size) { 1457 /* Drop even the current line if no space. */ 1458 if (len + prefix_len + line_len + 1 + 1 > buf_size) 1459 break; 1460 1461 text_len = buf_size - len - prefix_len - 1 - 1; 1462 truncated = true; 1463 } 1464 1465 memmove(text + prefix_len, text, text_len); 1466 memcpy(text, prefix, prefix_len); 1467 1468 /* 1469 * Increment the prepared length to include the text and 1470 * prefix that were just moved+copied. Also increment for the 1471 * newline at the end of this line. If this is the last line, 1472 * there is no newline, but it will be added immediately below. 1473 */ 1474 len += prefix_len + line_len + 1; 1475 if (text_len == line_len) { 1476 /* 1477 * This is the last line. Add the trailing newline 1478 * removed in vprintk_store(). 1479 */ 1480 text[prefix_len + line_len] = '\n'; 1481 break; 1482 } 1483 1484 /* 1485 * Advance beyond the added prefix and the related line with 1486 * its newline. 1487 */ 1488 text += prefix_len + line_len + 1; 1489 1490 /* 1491 * The remaining text has only decreased by the line with its 1492 * newline. 1493 * 1494 * Note that @text_len can become zero. It happens when @text 1495 * ended with a newline (either due to truncation or the 1496 * original string ending with "\n\n"). The loop is correctly 1497 * repeated and (if not truncated) an empty line with a prefix 1498 * will be prepared. 1499 */ 1500 text_len -= line_len + 1; 1501 } 1502 1503 /* 1504 * If a buffer was provided, it will be terminated. Space for the 1505 * string terminator is guaranteed to be available. The terminator is 1506 * not counted in the return value. 1507 */ 1508 if (buf_size > 0) 1509 r->text_buf[len] = 0; 1510 1511 return len; 1512 } 1513 1514 static size_t get_record_print_text_size(struct printk_info *info, 1515 unsigned int line_count, 1516 bool syslog, bool time) 1517 { 1518 char prefix[PREFIX_MAX]; 1519 size_t prefix_len; 1520 1521 prefix_len = info_print_prefix(info, syslog, time, prefix); 1522 1523 /* 1524 * Each line will be preceded with a prefix. The intermediate 1525 * newlines are already within the text, but a final trailing 1526 * newline will be added. 1527 */ 1528 return ((prefix_len * line_count) + info->text_len + 1); 1529 } 1530 1531 /* 1532 * Beginning with @start_seq, find the first record where it and all following 1533 * records up to (but not including) @max_seq fit into @size. 1534 * 1535 * @max_seq is simply an upper bound and does not need to exist. If the caller 1536 * does not require an upper bound, -1 can be used for @max_seq. 1537 */ 1538 static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size, 1539 bool syslog, bool time) 1540 { 1541 struct printk_info info; 1542 unsigned int line_count; 1543 size_t len = 0; 1544 u64 seq; 1545 1546 /* Determine the size of the records up to @max_seq. */ 1547 prb_for_each_info(start_seq, prb, seq, &info, &line_count) { 1548 if (info.seq >= max_seq) 1549 break; 1550 len += get_record_print_text_size(&info, line_count, syslog, time); 1551 } 1552 1553 /* 1554 * Adjust the upper bound for the next loop to avoid subtracting 1555 * lengths that were never added. 1556 */ 1557 if (seq < max_seq) 1558 max_seq = seq; 1559 1560 /* 1561 * Move first record forward until length fits into the buffer. Ignore 1562 * newest messages that were not counted in the above cycle. Messages 1563 * might appear and get lost in the meantime. This is a best effort 1564 * that prevents an infinite loop that could occur with a retry. 1565 */ 1566 prb_for_each_info(start_seq, prb, seq, &info, &line_count) { 1567 if (len <= size || info.seq >= max_seq) 1568 break; 1569 len -= get_record_print_text_size(&info, line_count, syslog, time); 1570 } 1571 1572 return seq; 1573 } 1574 1575 /* The caller is responsible for making sure @size is greater than 0. */ 1576 static int syslog_print(char __user *buf, int size) 1577 { 1578 struct printk_info info; 1579 struct printk_record r; 1580 char *text; 1581 int len = 0; 1582 u64 seq; 1583 1584 text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); 1585 if (!text) 1586 return -ENOMEM; 1587 1588 prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); 1589 1590 mutex_lock(&syslog_lock); 1591 1592 /* 1593 * Wait for the @syslog_seq record to be available. @syslog_seq may 1594 * change while waiting. 1595 */ 1596 do { 1597 seq = syslog_seq; 1598 1599 mutex_unlock(&syslog_lock); 1600 /* 1601 * Guarantee this task is visible on the waitqueue before 1602 * checking the wake condition. 1603 * 1604 * The full memory barrier within set_current_state() of 1605 * prepare_to_wait_event() pairs with the full memory barrier 1606 * within wq_has_sleeper(). 1607 * 1608 * This pairs with __wake_up_klogd:A. 1609 */ 1610 len = wait_event_interruptible(log_wait, 1611 prb_read_valid(prb, seq, NULL)); /* LMM(syslog_print:A) */ 1612 mutex_lock(&syslog_lock); 1613 1614 if (len) 1615 goto out; 1616 } while (syslog_seq != seq); 1617 1618 /* 1619 * Copy records that fit into the buffer. The above cycle makes sure 1620 * that the first record is always available. 1621 */ 1622 do { 1623 size_t n; 1624 size_t skip; 1625 int err; 1626 1627 if (!prb_read_valid(prb, syslog_seq, &r)) 1628 break; 1629 1630 if (r.info->seq != syslog_seq) { 1631 /* message is gone, move to next valid one */ 1632 syslog_seq = r.info->seq; 1633 syslog_partial = 0; 1634 } 1635 1636 /* 1637 * To keep reading/counting partial line consistent, 1638 * use printk_time value as of the beginning of a line. 1639 */ 1640 if (!syslog_partial) 1641 syslog_time = printk_time; 1642 1643 skip = syslog_partial; 1644 n = record_print_text(&r, true, syslog_time); 1645 if (n - syslog_partial <= size) { 1646 /* message fits into buffer, move forward */ 1647 syslog_seq = r.info->seq + 1; 1648 n -= syslog_partial; 1649 syslog_partial = 0; 1650 } else if (!len){ 1651 /* partial read(), remember position */ 1652 n = size; 1653 syslog_partial += n; 1654 } else 1655 n = 0; 1656 1657 if (!n) 1658 break; 1659 1660 mutex_unlock(&syslog_lock); 1661 err = copy_to_user(buf, text + skip, n); 1662 mutex_lock(&syslog_lock); 1663 1664 if (err) { 1665 if (!len) 1666 len = -EFAULT; 1667 break; 1668 } 1669 1670 len += n; 1671 size -= n; 1672 buf += n; 1673 } while (size); 1674 out: 1675 mutex_unlock(&syslog_lock); 1676 kfree(text); 1677 return len; 1678 } 1679 1680 static int syslog_print_all(char __user *buf, int size, bool clear) 1681 { 1682 struct printk_info info; 1683 struct printk_record r; 1684 char *text; 1685 int len = 0; 1686 u64 seq; 1687 bool time; 1688 1689 text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); 1690 if (!text) 1691 return -ENOMEM; 1692 1693 time = printk_time; 1694 /* 1695 * Find first record that fits, including all following records, 1696 * into the user-provided buffer for this dump. 1697 */ 1698 seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1, 1699 size, true, time); 1700 1701 prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); 1702 1703 len = 0; 1704 prb_for_each_record(seq, prb, seq, &r) { 1705 int textlen; 1706 1707 textlen = record_print_text(&r, true, time); 1708 1709 if (len + textlen > size) { 1710 seq--; 1711 break; 1712 } 1713 1714 if (copy_to_user(buf + len, text, textlen)) 1715 len = -EFAULT; 1716 else 1717 len += textlen; 1718 1719 if (len < 0) 1720 break; 1721 } 1722 1723 if (clear) { 1724 mutex_lock(&syslog_lock); 1725 latched_seq_write(&clear_seq, seq); 1726 mutex_unlock(&syslog_lock); 1727 } 1728 1729 kfree(text); 1730 return len; 1731 } 1732 1733 static void syslog_clear(void) 1734 { 1735 mutex_lock(&syslog_lock); 1736 latched_seq_write(&clear_seq, prb_next_seq(prb)); 1737 mutex_unlock(&syslog_lock); 1738 } 1739 1740 int do_syslog(int type, char __user *buf, int len, int source) 1741 { 1742 struct printk_info info; 1743 bool clear = false; 1744 static int saved_console_loglevel = LOGLEVEL_DEFAULT; 1745 int error; 1746 1747 error = check_syslog_permissions(type, source); 1748 if (error) 1749 return error; 1750 1751 switch (type) { 1752 case SYSLOG_ACTION_CLOSE: /* Close log */ 1753 break; 1754 case SYSLOG_ACTION_OPEN: /* Open log */ 1755 break; 1756 case SYSLOG_ACTION_READ: /* Read from log */ 1757 if (!buf || len < 0) 1758 return -EINVAL; 1759 if (!len) 1760 return 0; 1761 if (!access_ok(buf, len)) 1762 return -EFAULT; 1763 error = syslog_print(buf, len); 1764 break; 1765 /* Read/clear last kernel messages */ 1766 case SYSLOG_ACTION_READ_CLEAR: 1767 clear = true; 1768 fallthrough; 1769 /* Read last kernel messages */ 1770 case SYSLOG_ACTION_READ_ALL: 1771 if (!buf || len < 0) 1772 return -EINVAL; 1773 if (!len) 1774 return 0; 1775 if (!access_ok(buf, len)) 1776 return -EFAULT; 1777 error = syslog_print_all(buf, len, clear); 1778 break; 1779 /* Clear ring buffer */ 1780 case SYSLOG_ACTION_CLEAR: 1781 syslog_clear(); 1782 break; 1783 /* Disable logging to console */ 1784 case SYSLOG_ACTION_CONSOLE_OFF: 1785 if (saved_console_loglevel == LOGLEVEL_DEFAULT) 1786 saved_console_loglevel = console_loglevel; 1787 console_loglevel = minimum_console_loglevel; 1788 break; 1789 /* Enable logging to console */ 1790 case SYSLOG_ACTION_CONSOLE_ON: 1791 if (saved_console_loglevel != LOGLEVEL_DEFAULT) { 1792 console_loglevel = saved_console_loglevel; 1793 saved_console_loglevel = LOGLEVEL_DEFAULT; 1794 } 1795 break; 1796 /* Set level of messages printed to console */ 1797 case SYSLOG_ACTION_CONSOLE_LEVEL: 1798 if (len < 1 || len > 8) 1799 return -EINVAL; 1800 if (len < minimum_console_loglevel) 1801 len = minimum_console_loglevel; 1802 console_loglevel = len; 1803 /* Implicitly re-enable logging to console */ 1804 saved_console_loglevel = LOGLEVEL_DEFAULT; 1805 break; 1806 /* Number of chars in the log buffer */ 1807 case SYSLOG_ACTION_SIZE_UNREAD: 1808 mutex_lock(&syslog_lock); 1809 if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { 1810 /* No unread messages. */ 1811 mutex_unlock(&syslog_lock); 1812 return 0; 1813 } 1814 if (info.seq != syslog_seq) { 1815 /* messages are gone, move to first one */ 1816 syslog_seq = info.seq; 1817 syslog_partial = 0; 1818 } 1819 if (source == SYSLOG_FROM_PROC) { 1820 /* 1821 * Short-cut for poll(/"proc/kmsg") which simply checks 1822 * for pending data, not the size; return the count of 1823 * records, not the length. 1824 */ 1825 error = prb_next_seq(prb) - syslog_seq; 1826 } else { 1827 bool time = syslog_partial ? syslog_time : printk_time; 1828 unsigned int line_count; 1829 u64 seq; 1830 1831 prb_for_each_info(syslog_seq, prb, seq, &info, 1832 &line_count) { 1833 error += get_record_print_text_size(&info, line_count, 1834 true, time); 1835 time = printk_time; 1836 } 1837 error -= syslog_partial; 1838 } 1839 mutex_unlock(&syslog_lock); 1840 break; 1841 /* Size of the log buffer */ 1842 case SYSLOG_ACTION_SIZE_BUFFER: 1843 error = log_buf_len; 1844 break; 1845 default: 1846 error = -EINVAL; 1847 break; 1848 } 1849 1850 return error; 1851 } 1852 1853 SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) 1854 { 1855 return do_syslog(type, buf, len, SYSLOG_FROM_READER); 1856 } 1857 1858 /* 1859 * Special console_lock variants that help to reduce the risk of soft-lockups. 1860 * They allow to pass console_lock to another printk() call using a busy wait. 1861 */ 1862 1863 #ifdef CONFIG_LOCKDEP 1864 static struct lockdep_map console_owner_dep_map = { 1865 .name = "console_owner" 1866 }; 1867 #endif 1868 1869 static DEFINE_RAW_SPINLOCK(console_owner_lock); 1870 static struct task_struct *console_owner; 1871 static bool console_waiter; 1872 1873 /** 1874 * console_lock_spinning_enable - mark beginning of code where another 1875 * thread might safely busy wait 1876 * 1877 * This basically converts console_lock into a spinlock. This marks 1878 * the section where the console_lock owner can not sleep, because 1879 * there may be a waiter spinning (like a spinlock). Also it must be 1880 * ready to hand over the lock at the end of the section. 1881 */ 1882 static void console_lock_spinning_enable(void) 1883 { 1884 raw_spin_lock(&console_owner_lock); 1885 console_owner = current; 1886 raw_spin_unlock(&console_owner_lock); 1887 1888 /* The waiter may spin on us after setting console_owner */ 1889 spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); 1890 } 1891 1892 /** 1893 * console_lock_spinning_disable_and_check - mark end of code where another 1894 * thread was able to busy wait and check if there is a waiter 1895 * @cookie: cookie returned from console_srcu_read_lock() 1896 * 1897 * This is called at the end of the section where spinning is allowed. 1898 * It has two functions. First, it is a signal that it is no longer 1899 * safe to start busy waiting for the lock. Second, it checks if 1900 * there is a busy waiter and passes the lock rights to her. 1901 * 1902 * Important: Callers lose both the console_lock and the SRCU read lock if 1903 * there was a busy waiter. They must not touch items synchronized by 1904 * console_lock or SRCU read lock in this case. 1905 * 1906 * Return: 1 if the lock rights were passed, 0 otherwise. 1907 */ 1908 static int console_lock_spinning_disable_and_check(int cookie) 1909 { 1910 int waiter; 1911 1912 raw_spin_lock(&console_owner_lock); 1913 waiter = READ_ONCE(console_waiter); 1914 console_owner = NULL; 1915 raw_spin_unlock(&console_owner_lock); 1916 1917 if (!waiter) { 1918 spin_release(&console_owner_dep_map, _THIS_IP_); 1919 return 0; 1920 } 1921 1922 /* The waiter is now free to continue */ 1923 WRITE_ONCE(console_waiter, false); 1924 1925 spin_release(&console_owner_dep_map, _THIS_IP_); 1926 1927 /* 1928 * Preserve lockdep lock ordering. Release the SRCU read lock before 1929 * releasing the console_lock. 1930 */ 1931 console_srcu_read_unlock(cookie); 1932 1933 /* 1934 * Hand off console_lock to waiter. The waiter will perform 1935 * the up(). After this, the waiter is the console_lock owner. 1936 */ 1937 mutex_release(&console_lock_dep_map, _THIS_IP_); 1938 return 1; 1939 } 1940 1941 /** 1942 * console_trylock_spinning - try to get console_lock by busy waiting 1943 * 1944 * This allows to busy wait for the console_lock when the current 1945 * owner is running in specially marked sections. It means that 1946 * the current owner is running and cannot reschedule until it 1947 * is ready to lose the lock. 1948 * 1949 * Return: 1 if we got the lock, 0 othrewise 1950 */ 1951 static int console_trylock_spinning(void) 1952 { 1953 struct task_struct *owner = NULL; 1954 bool waiter; 1955 bool spin = false; 1956 unsigned long flags; 1957 1958 if (console_trylock()) 1959 return 1; 1960 1961 /* 1962 * It's unsafe to spin once a panic has begun. If we are the 1963 * panic CPU, we may have already halted the owner of the 1964 * console_sem. If we are not the panic CPU, then we should 1965 * avoid taking console_sem, so the panic CPU has a better 1966 * chance of cleanly acquiring it later. 1967 */ 1968 if (panic_in_progress()) 1969 return 0; 1970 1971 printk_safe_enter_irqsave(flags); 1972 1973 raw_spin_lock(&console_owner_lock); 1974 owner = READ_ONCE(console_owner); 1975 waiter = READ_ONCE(console_waiter); 1976 if (!waiter && owner && owner != current) { 1977 WRITE_ONCE(console_waiter, true); 1978 spin = true; 1979 } 1980 raw_spin_unlock(&console_owner_lock); 1981 1982 /* 1983 * If there is an active printk() writing to the 1984 * consoles, instead of having it write our data too, 1985 * see if we can offload that load from the active 1986 * printer, and do some printing ourselves. 1987 * Go into a spin only if there isn't already a waiter 1988 * spinning, and there is an active printer, and 1989 * that active printer isn't us (recursive printk?). 1990 */ 1991 if (!spin) { 1992 printk_safe_exit_irqrestore(flags); 1993 return 0; 1994 } 1995 1996 /* We spin waiting for the owner to release us */ 1997 spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); 1998 /* Owner will clear console_waiter on hand off */ 1999 while (READ_ONCE(console_waiter)) 2000 cpu_relax(); 2001 spin_release(&console_owner_dep_map, _THIS_IP_); 2002 2003 printk_safe_exit_irqrestore(flags); 2004 /* 2005 * The owner passed the console lock to us. 2006 * Since we did not spin on console lock, annotate 2007 * this as a trylock. Otherwise lockdep will 2008 * complain. 2009 */ 2010 mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); 2011 2012 return 1; 2013 } 2014 2015 /* 2016 * Call the specified console driver, asking it to write out the specified 2017 * text and length. If @dropped_text is non-NULL and any records have been 2018 * dropped, a dropped message will be written out first. 2019 */ 2020 static void call_console_driver(struct console *con, const char *text, size_t len, 2021 char *dropped_text) 2022 { 2023 size_t dropped_len; 2024 2025 if (con->dropped && dropped_text) { 2026 dropped_len = snprintf(dropped_text, DROPPED_TEXT_MAX, 2027 "** %lu printk messages dropped **\n", 2028 con->dropped); 2029 con->dropped = 0; 2030 con->write(con, dropped_text, dropped_len); 2031 } 2032 2033 con->write(con, text, len); 2034 } 2035 2036 /* 2037 * Recursion is tracked separately on each CPU. If NMIs are supported, an 2038 * additional NMI context per CPU is also separately tracked. Until per-CPU 2039 * is available, a separate "early tracking" is performed. 2040 */ 2041 static DEFINE_PER_CPU(u8, printk_count); 2042 static u8 printk_count_early; 2043 #ifdef CONFIG_HAVE_NMI 2044 static DEFINE_PER_CPU(u8, printk_count_nmi); 2045 static u8 printk_count_nmi_early; 2046 #endif 2047 2048 /* 2049 * Recursion is limited to keep the output sane. printk() should not require 2050 * more than 1 level of recursion (allowing, for example, printk() to trigger 2051 * a WARN), but a higher value is used in case some printk-internal errors 2052 * exist, such as the ringbuffer validation checks failing. 2053 */ 2054 #define PRINTK_MAX_RECURSION 3 2055 2056 /* 2057 * Return a pointer to the dedicated counter for the CPU+context of the 2058 * caller. 2059 */ 2060 static u8 *__printk_recursion_counter(void) 2061 { 2062 #ifdef CONFIG_HAVE_NMI 2063 if (in_nmi()) { 2064 if (printk_percpu_data_ready()) 2065 return this_cpu_ptr(&printk_count_nmi); 2066 return &printk_count_nmi_early; 2067 } 2068 #endif 2069 if (printk_percpu_data_ready()) 2070 return this_cpu_ptr(&printk_count); 2071 return &printk_count_early; 2072 } 2073 2074 /* 2075 * Enter recursion tracking. Interrupts are disabled to simplify tracking. 2076 * The caller must check the boolean return value to see if the recursion is 2077 * allowed. On failure, interrupts are not disabled. 2078 * 2079 * @recursion_ptr must be a variable of type (u8 *) and is the same variable 2080 * that is passed to printk_exit_irqrestore(). 2081 */ 2082 #define printk_enter_irqsave(recursion_ptr, flags) \ 2083 ({ \ 2084 bool success = true; \ 2085 \ 2086 typecheck(u8 *, recursion_ptr); \ 2087 local_irq_save(flags); \ 2088 (recursion_ptr) = __printk_recursion_counter(); \ 2089 if (*(recursion_ptr) > PRINTK_MAX_RECURSION) { \ 2090 local_irq_restore(flags); \ 2091 success = false; \ 2092 } else { \ 2093 (*(recursion_ptr))++; \ 2094 } \ 2095 success; \ 2096 }) 2097 2098 /* Exit recursion tracking, restoring interrupts. */ 2099 #define printk_exit_irqrestore(recursion_ptr, flags) \ 2100 do { \ 2101 typecheck(u8 *, recursion_ptr); \ 2102 (*(recursion_ptr))--; \ 2103 local_irq_restore(flags); \ 2104 } while (0) 2105 2106 int printk_delay_msec __read_mostly; 2107 2108 static inline void printk_delay(int level) 2109 { 2110 boot_delay_msec(level); 2111 2112 if (unlikely(printk_delay_msec)) { 2113 int m = printk_delay_msec; 2114 2115 while (m--) { 2116 mdelay(1); 2117 touch_nmi_watchdog(); 2118 } 2119 } 2120 } 2121 2122 static inline u32 printk_caller_id(void) 2123 { 2124 return in_task() ? task_pid_nr(current) : 2125 0x80000000 + smp_processor_id(); 2126 } 2127 2128 /** 2129 * printk_parse_prefix - Parse level and control flags. 2130 * 2131 * @text: The terminated text message. 2132 * @level: A pointer to the current level value, will be updated. 2133 * @flags: A pointer to the current printk_info flags, will be updated. 2134 * 2135 * @level may be NULL if the caller is not interested in the parsed value. 2136 * Otherwise the variable pointed to by @level must be set to 2137 * LOGLEVEL_DEFAULT in order to be updated with the parsed value. 2138 * 2139 * @flags may be NULL if the caller is not interested in the parsed value. 2140 * Otherwise the variable pointed to by @flags will be OR'd with the parsed 2141 * value. 2142 * 2143 * Return: The length of the parsed level and control flags. 2144 */ 2145 u16 printk_parse_prefix(const char *text, int *level, 2146 enum printk_info_flags *flags) 2147 { 2148 u16 prefix_len = 0; 2149 int kern_level; 2150 2151 while (*text) { 2152 kern_level = printk_get_level(text); 2153 if (!kern_level) 2154 break; 2155 2156 switch (kern_level) { 2157 case '0' ... '7': 2158 if (level && *level == LOGLEVEL_DEFAULT) 2159 *level = kern_level - '0'; 2160 break; 2161 case 'c': /* KERN_CONT */ 2162 if (flags) 2163 *flags |= LOG_CONT; 2164 } 2165 2166 prefix_len += 2; 2167 text += 2; 2168 } 2169 2170 return prefix_len; 2171 } 2172 2173 __printf(5, 0) 2174 static u16 printk_sprint(char *text, u16 size, int facility, 2175 enum printk_info_flags *flags, const char *fmt, 2176 va_list args) 2177 { 2178 u16 text_len; 2179 2180 text_len = vscnprintf(text, size, fmt, args); 2181 2182 /* Mark and strip a trailing newline. */ 2183 if (text_len && text[text_len - 1] == '\n') { 2184 text_len--; 2185 *flags |= LOG_NEWLINE; 2186 } 2187 2188 /* Strip log level and control flags. */ 2189 if (facility == 0) { 2190 u16 prefix_len; 2191 2192 prefix_len = printk_parse_prefix(text, NULL, NULL); 2193 if (prefix_len) { 2194 text_len -= prefix_len; 2195 memmove(text, text + prefix_len, text_len); 2196 } 2197 } 2198 2199 trace_console_rcuidle(text, text_len); 2200 2201 return text_len; 2202 } 2203 2204 __printf(4, 0) 2205 int vprintk_store(int facility, int level, 2206 const struct dev_printk_info *dev_info, 2207 const char *fmt, va_list args) 2208 { 2209 struct prb_reserved_entry e; 2210 enum printk_info_flags flags = 0; 2211 struct printk_record r; 2212 unsigned long irqflags; 2213 u16 trunc_msg_len = 0; 2214 char prefix_buf[8]; 2215 u8 *recursion_ptr; 2216 u16 reserve_size; 2217 va_list args2; 2218 u32 caller_id; 2219 u16 text_len; 2220 int ret = 0; 2221 u64 ts_nsec; 2222 2223 if (!printk_enter_irqsave(recursion_ptr, irqflags)) 2224 return 0; 2225 2226 /* 2227 * Since the duration of printk() can vary depending on the message 2228 * and state of the ringbuffer, grab the timestamp now so that it is 2229 * close to the call of printk(). This provides a more deterministic 2230 * timestamp with respect to the caller. 2231 */ 2232 ts_nsec = local_clock(); 2233 2234 caller_id = printk_caller_id(); 2235 2236 /* 2237 * The sprintf needs to come first since the syslog prefix might be 2238 * passed in as a parameter. An extra byte must be reserved so that 2239 * later the vscnprintf() into the reserved buffer has room for the 2240 * terminating '\0', which is not counted by vsnprintf(). 2241 */ 2242 va_copy(args2, args); 2243 reserve_size = vsnprintf(&prefix_buf[0], sizeof(prefix_buf), fmt, args2) + 1; 2244 va_end(args2); 2245 2246 if (reserve_size > LOG_LINE_MAX) 2247 reserve_size = LOG_LINE_MAX; 2248 2249 /* Extract log level or control flags. */ 2250 if (facility == 0) 2251 printk_parse_prefix(&prefix_buf[0], &level, &flags); 2252 2253 if (level == LOGLEVEL_DEFAULT) 2254 level = default_message_loglevel; 2255 2256 if (dev_info) 2257 flags |= LOG_NEWLINE; 2258 2259 if (flags & LOG_CONT) { 2260 prb_rec_init_wr(&r, reserve_size); 2261 if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) { 2262 text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size, 2263 facility, &flags, fmt, args); 2264 r.info->text_len += text_len; 2265 2266 if (flags & LOG_NEWLINE) { 2267 r.info->flags |= LOG_NEWLINE; 2268 prb_final_commit(&e); 2269 } else { 2270 prb_commit(&e); 2271 } 2272 2273 ret = text_len; 2274 goto out; 2275 } 2276 } 2277 2278 /* 2279 * Explicitly initialize the record before every prb_reserve() call. 2280 * prb_reserve_in_last() and prb_reserve() purposely invalidate the 2281 * structure when they fail. 2282 */ 2283 prb_rec_init_wr(&r, reserve_size); 2284 if (!prb_reserve(&e, prb, &r)) { 2285 /* truncate the message if it is too long for empty buffer */ 2286 truncate_msg(&reserve_size, &trunc_msg_len); 2287 2288 prb_rec_init_wr(&r, reserve_size + trunc_msg_len); 2289 if (!prb_reserve(&e, prb, &r)) 2290 goto out; 2291 } 2292 2293 /* fill message */ 2294 text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args); 2295 if (trunc_msg_len) 2296 memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len); 2297 r.info->text_len = text_len + trunc_msg_len; 2298 r.info->facility = facility; 2299 r.info->level = level & 7; 2300 r.info->flags = flags & 0x1f; 2301 r.info->ts_nsec = ts_nsec; 2302 r.info->caller_id = caller_id; 2303 if (dev_info) 2304 memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); 2305 2306 /* A message without a trailing newline can be continued. */ 2307 if (!(flags & LOG_NEWLINE)) 2308 prb_commit(&e); 2309 else 2310 prb_final_commit(&e); 2311 2312 ret = text_len + trunc_msg_len; 2313 out: 2314 printk_exit_irqrestore(recursion_ptr, irqflags); 2315 return ret; 2316 } 2317 2318 asmlinkage int vprintk_emit(int facility, int level, 2319 const struct dev_printk_info *dev_info, 2320 const char *fmt, va_list args) 2321 { 2322 int printed_len; 2323 bool in_sched = false; 2324 2325 /* Suppress unimportant messages after panic happens */ 2326 if (unlikely(suppress_printk)) 2327 return 0; 2328 2329 if (unlikely(suppress_panic_printk) && 2330 atomic_read(&panic_cpu) != raw_smp_processor_id()) 2331 return 0; 2332 2333 if (level == LOGLEVEL_SCHED) { 2334 level = LOGLEVEL_DEFAULT; 2335 in_sched = true; 2336 } 2337 2338 printk_delay(level); 2339 2340 printed_len = vprintk_store(facility, level, dev_info, fmt, args); 2341 2342 /* If called from the scheduler, we can not call up(). */ 2343 if (!in_sched) { 2344 /* 2345 * The caller may be holding system-critical or 2346 * timing-sensitive locks. Disable preemption during 2347 * printing of all remaining records to all consoles so that 2348 * this context can return as soon as possible. Hopefully 2349 * another printk() caller will take over the printing. 2350 */ 2351 preempt_disable(); 2352 /* 2353 * Try to acquire and then immediately release the console 2354 * semaphore. The release will print out buffers. With the 2355 * spinning variant, this context tries to take over the 2356 * printing from another printing context. 2357 */ 2358 if (console_trylock_spinning()) 2359 console_unlock(); 2360 preempt_enable(); 2361 } 2362 2363 wake_up_klogd(); 2364 return printed_len; 2365 } 2366 EXPORT_SYMBOL(vprintk_emit); 2367 2368 int vprintk_default(const char *fmt, va_list args) 2369 { 2370 return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); 2371 } 2372 EXPORT_SYMBOL_GPL(vprintk_default); 2373 2374 asmlinkage __visible int _printk(const char *fmt, ...) 2375 { 2376 va_list args; 2377 int r; 2378 2379 va_start(args, fmt); 2380 r = vprintk(fmt, args); 2381 va_end(args); 2382 2383 return r; 2384 } 2385 EXPORT_SYMBOL(_printk); 2386 2387 static bool pr_flush(int timeout_ms, bool reset_on_progress); 2388 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); 2389 2390 #else /* CONFIG_PRINTK */ 2391 2392 #define CONSOLE_LOG_MAX 0 2393 #define DROPPED_TEXT_MAX 0 2394 #define printk_time false 2395 2396 #define prb_read_valid(rb, seq, r) false 2397 #define prb_first_valid_seq(rb) 0 2398 #define prb_next_seq(rb) 0 2399 2400 static u64 syslog_seq; 2401 2402 static size_t record_print_text(const struct printk_record *r, 2403 bool syslog, bool time) 2404 { 2405 return 0; 2406 } 2407 static ssize_t info_print_ext_header(char *buf, size_t size, 2408 struct printk_info *info) 2409 { 2410 return 0; 2411 } 2412 static ssize_t msg_print_ext_body(char *buf, size_t size, 2413 char *text, size_t text_len, 2414 struct dev_printk_info *dev_info) { return 0; } 2415 static void console_lock_spinning_enable(void) { } 2416 static int console_lock_spinning_disable_and_check(int cookie) { return 0; } 2417 static void call_console_driver(struct console *con, const char *text, size_t len, 2418 char *dropped_text) 2419 { 2420 } 2421 static bool suppress_message_printing(int level) { return false; } 2422 static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; } 2423 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } 2424 2425 #endif /* CONFIG_PRINTK */ 2426 2427 #ifdef CONFIG_EARLY_PRINTK 2428 struct console *early_console; 2429 2430 asmlinkage __visible void early_printk(const char *fmt, ...) 2431 { 2432 va_list ap; 2433 char buf[512]; 2434 int n; 2435 2436 if (!early_console) 2437 return; 2438 2439 va_start(ap, fmt); 2440 n = vscnprintf(buf, sizeof(buf), fmt, ap); 2441 va_end(ap); 2442 2443 early_console->write(early_console, buf, n); 2444 } 2445 #endif 2446 2447 static void set_user_specified(struct console_cmdline *c, bool user_specified) 2448 { 2449 if (!user_specified) 2450 return; 2451 2452 /* 2453 * @c console was defined by the user on the command line. 2454 * Do not clear when added twice also by SPCR or the device tree. 2455 */ 2456 c->user_specified = true; 2457 /* At least one console defined by the user on the command line. */ 2458 console_set_on_cmdline = 1; 2459 } 2460 2461 static int __add_preferred_console(char *name, int idx, char *options, 2462 char *brl_options, bool user_specified) 2463 { 2464 struct console_cmdline *c; 2465 int i; 2466 2467 /* 2468 * See if this tty is not yet registered, and 2469 * if we have a slot free. 2470 */ 2471 for (i = 0, c = console_cmdline; 2472 i < MAX_CMDLINECONSOLES && c->name[0]; 2473 i++, c++) { 2474 if (strcmp(c->name, name) == 0 && c->index == idx) { 2475 if (!brl_options) 2476 preferred_console = i; 2477 set_user_specified(c, user_specified); 2478 return 0; 2479 } 2480 } 2481 if (i == MAX_CMDLINECONSOLES) 2482 return -E2BIG; 2483 if (!brl_options) 2484 preferred_console = i; 2485 strscpy(c->name, name, sizeof(c->name)); 2486 c->options = options; 2487 set_user_specified(c, user_specified); 2488 braille_set_options(c, brl_options); 2489 2490 c->index = idx; 2491 return 0; 2492 } 2493 2494 static int __init console_msg_format_setup(char *str) 2495 { 2496 if (!strcmp(str, "syslog")) 2497 console_msg_format = MSG_FORMAT_SYSLOG; 2498 if (!strcmp(str, "default")) 2499 console_msg_format = MSG_FORMAT_DEFAULT; 2500 return 1; 2501 } 2502 __setup("console_msg_format=", console_msg_format_setup); 2503 2504 /* 2505 * Set up a console. Called via do_early_param() in init/main.c 2506 * for each "console=" parameter in the boot command line. 2507 */ 2508 static int __init console_setup(char *str) 2509 { 2510 char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */ 2511 char *s, *options, *brl_options = NULL; 2512 int idx; 2513 2514 /* 2515 * console="" or console=null have been suggested as a way to 2516 * disable console output. Use ttynull that has been created 2517 * for exactly this purpose. 2518 */ 2519 if (str[0] == 0 || strcmp(str, "null") == 0) { 2520 __add_preferred_console("ttynull", 0, NULL, NULL, true); 2521 return 1; 2522 } 2523 2524 if (_braille_console_setup(&str, &brl_options)) 2525 return 1; 2526 2527 /* 2528 * Decode str into name, index, options. 2529 */ 2530 if (str[0] >= '0' && str[0] <= '9') { 2531 strcpy(buf, "ttyS"); 2532 strncpy(buf + 4, str, sizeof(buf) - 5); 2533 } else { 2534 strncpy(buf, str, sizeof(buf) - 1); 2535 } 2536 buf[sizeof(buf) - 1] = 0; 2537 options = strchr(str, ','); 2538 if (options) 2539 *(options++) = 0; 2540 #ifdef __sparc__ 2541 if (!strcmp(str, "ttya")) 2542 strcpy(buf, "ttyS0"); 2543 if (!strcmp(str, "ttyb")) 2544 strcpy(buf, "ttyS1"); 2545 #endif 2546 for (s = buf; *s; s++) 2547 if (isdigit(*s) || *s == ',') 2548 break; 2549 idx = simple_strtoul(s, NULL, 10); 2550 *s = 0; 2551 2552 __add_preferred_console(buf, idx, options, brl_options, true); 2553 return 1; 2554 } 2555 __setup("console=", console_setup); 2556 2557 /** 2558 * add_preferred_console - add a device to the list of preferred consoles. 2559 * @name: device name 2560 * @idx: device index 2561 * @options: options for this console 2562 * 2563 * The last preferred console added will be used for kernel messages 2564 * and stdin/out/err for init. Normally this is used by console_setup 2565 * above to handle user-supplied console arguments; however it can also 2566 * be used by arch-specific code either to override the user or more 2567 * commonly to provide a default console (ie from PROM variables) when 2568 * the user has not supplied one. 2569 */ 2570 int add_preferred_console(char *name, int idx, char *options) 2571 { 2572 return __add_preferred_console(name, idx, options, NULL, false); 2573 } 2574 2575 bool console_suspend_enabled = true; 2576 EXPORT_SYMBOL(console_suspend_enabled); 2577 2578 static int __init console_suspend_disable(char *str) 2579 { 2580 console_suspend_enabled = false; 2581 return 1; 2582 } 2583 __setup("no_console_suspend", console_suspend_disable); 2584 module_param_named(console_suspend, console_suspend_enabled, 2585 bool, S_IRUGO | S_IWUSR); 2586 MODULE_PARM_DESC(console_suspend, "suspend console during suspend" 2587 " and hibernate operations"); 2588 2589 static bool printk_console_no_auto_verbose; 2590 2591 void console_verbose(void) 2592 { 2593 if (console_loglevel && !printk_console_no_auto_verbose) 2594 console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH; 2595 } 2596 EXPORT_SYMBOL_GPL(console_verbose); 2597 2598 module_param_named(console_no_auto_verbose, printk_console_no_auto_verbose, bool, 0644); 2599 MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to highest on oops/panic/etc"); 2600 2601 /** 2602 * suspend_console - suspend the console subsystem 2603 * 2604 * This disables printk() while we go into suspend states 2605 */ 2606 void suspend_console(void) 2607 { 2608 if (!console_suspend_enabled) 2609 return; 2610 pr_info("Suspending console(s) (use no_console_suspend to debug)\n"); 2611 pr_flush(1000, true); 2612 console_lock(); 2613 console_suspended = 1; 2614 up_console_sem(); 2615 } 2616 2617 void resume_console(void) 2618 { 2619 if (!console_suspend_enabled) 2620 return; 2621 down_console_sem(); 2622 console_suspended = 0; 2623 console_unlock(); 2624 pr_flush(1000, true); 2625 } 2626 2627 /** 2628 * console_cpu_notify - print deferred console messages after CPU hotplug 2629 * @cpu: unused 2630 * 2631 * If printk() is called from a CPU that is not online yet, the messages 2632 * will be printed on the console only if there are CON_ANYTIME consoles. 2633 * This function is called when a new CPU comes online (or fails to come 2634 * up) or goes offline. 2635 */ 2636 static int console_cpu_notify(unsigned int cpu) 2637 { 2638 if (!cpuhp_tasks_frozen) { 2639 /* If trylock fails, someone else is doing the printing */ 2640 if (console_trylock()) 2641 console_unlock(); 2642 } 2643 return 0; 2644 } 2645 2646 /** 2647 * console_lock - block the console subsystem from printing 2648 * 2649 * Acquires a lock which guarantees that no consoles will 2650 * be in or enter their write() callback. 2651 * 2652 * Can sleep, returns nothing. 2653 */ 2654 void console_lock(void) 2655 { 2656 might_sleep(); 2657 2658 down_console_sem(); 2659 if (console_suspended) 2660 return; 2661 console_locked = 1; 2662 console_may_schedule = 1; 2663 } 2664 EXPORT_SYMBOL(console_lock); 2665 2666 /** 2667 * console_trylock - try to block the console subsystem from printing 2668 * 2669 * Try to acquire a lock which guarantees that no consoles will 2670 * be in or enter their write() callback. 2671 * 2672 * returns 1 on success, and 0 on failure to acquire the lock. 2673 */ 2674 int console_trylock(void) 2675 { 2676 if (down_trylock_console_sem()) 2677 return 0; 2678 if (console_suspended) { 2679 up_console_sem(); 2680 return 0; 2681 } 2682 console_locked = 1; 2683 console_may_schedule = 0; 2684 return 1; 2685 } 2686 EXPORT_SYMBOL(console_trylock); 2687 2688 int is_console_locked(void) 2689 { 2690 return console_locked; 2691 } 2692 EXPORT_SYMBOL(is_console_locked); 2693 2694 /* 2695 * Return true when this CPU should unlock console_sem without pushing all 2696 * messages to the console. This reduces the chance that the console is 2697 * locked when the panic CPU tries to use it. 2698 */ 2699 static bool abandon_console_lock_in_panic(void) 2700 { 2701 if (!panic_in_progress()) 2702 return false; 2703 2704 /* 2705 * We can use raw_smp_processor_id() here because it is impossible for 2706 * the task to be migrated to the panic_cpu, or away from it. If 2707 * panic_cpu has already been set, and we're not currently executing on 2708 * that CPU, then we never will be. 2709 */ 2710 return atomic_read(&panic_cpu) != raw_smp_processor_id(); 2711 } 2712 2713 /* 2714 * Check if the given console is currently capable and allowed to print 2715 * records. 2716 * 2717 * Requires the console_srcu_read_lock. 2718 */ 2719 static inline bool console_is_usable(struct console *con) 2720 { 2721 short flags = console_srcu_read_flags(con); 2722 2723 if (!(flags & CON_ENABLED)) 2724 return false; 2725 2726 if (!con->write) 2727 return false; 2728 2729 /* 2730 * Console drivers may assume that per-cpu resources have been 2731 * allocated. So unless they're explicitly marked as being able to 2732 * cope (CON_ANYTIME) don't call them until this CPU is officially up. 2733 */ 2734 if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME)) 2735 return false; 2736 2737 return true; 2738 } 2739 2740 static void __console_unlock(void) 2741 { 2742 console_locked = 0; 2743 up_console_sem(); 2744 } 2745 2746 /* 2747 * Print one record for the given console. The record printed is whatever 2748 * record is the next available record for the given console. 2749 * 2750 * @text is a buffer of size CONSOLE_LOG_MAX. 2751 * 2752 * If extended messages should be printed, @ext_text is a buffer of size 2753 * CONSOLE_EXT_LOG_MAX. Otherwise @ext_text must be NULL. 2754 * 2755 * If dropped messages should be printed, @dropped_text is a buffer of size 2756 * DROPPED_TEXT_MAX. Otherwise @dropped_text must be NULL. 2757 * 2758 * @handover will be set to true if a printk waiter has taken over the 2759 * console_lock, in which case the caller is no longer holding both the 2760 * console_lock and the SRCU read lock. Otherwise it is set to false. 2761 * 2762 * @cookie is the cookie from the SRCU read lock. 2763 * 2764 * Returns false if the given console has no next record to print, otherwise 2765 * true. 2766 * 2767 * Requires the console_lock and the SRCU read lock. 2768 */ 2769 static bool console_emit_next_record(struct console *con, char *text, char *ext_text, 2770 char *dropped_text, bool *handover, int cookie) 2771 { 2772 static int panic_console_dropped; 2773 struct printk_info info; 2774 struct printk_record r; 2775 unsigned long flags; 2776 char *write_text; 2777 size_t len; 2778 2779 prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); 2780 2781 *handover = false; 2782 2783 if (!prb_read_valid(prb, con->seq, &r)) 2784 return false; 2785 2786 if (con->seq != r.info->seq) { 2787 con->dropped += r.info->seq - con->seq; 2788 con->seq = r.info->seq; 2789 if (panic_in_progress() && panic_console_dropped++ > 10) { 2790 suppress_panic_printk = 1; 2791 pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n"); 2792 } 2793 } 2794 2795 /* Skip record that has level above the console loglevel. */ 2796 if (suppress_message_printing(r.info->level)) { 2797 con->seq++; 2798 goto skip; 2799 } 2800 2801 if (ext_text) { 2802 write_text = ext_text; 2803 len = info_print_ext_header(ext_text, CONSOLE_EXT_LOG_MAX, r.info); 2804 len += msg_print_ext_body(ext_text + len, CONSOLE_EXT_LOG_MAX - len, 2805 &r.text_buf[0], r.info->text_len, &r.info->dev_info); 2806 } else { 2807 write_text = text; 2808 len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); 2809 } 2810 2811 /* 2812 * While actively printing out messages, if another printk() 2813 * were to occur on another CPU, it may wait for this one to 2814 * finish. This task can not be preempted if there is a 2815 * waiter waiting to take over. 2816 * 2817 * Interrupts are disabled because the hand over to a waiter 2818 * must not be interrupted until the hand over is completed 2819 * (@console_waiter is cleared). 2820 */ 2821 printk_safe_enter_irqsave(flags); 2822 console_lock_spinning_enable(); 2823 2824 stop_critical_timings(); /* don't trace print latency */ 2825 call_console_driver(con, write_text, len, dropped_text); 2826 start_critical_timings(); 2827 2828 con->seq++; 2829 2830 *handover = console_lock_spinning_disable_and_check(cookie); 2831 printk_safe_exit_irqrestore(flags); 2832 skip: 2833 return true; 2834 } 2835 2836 /* 2837 * Print out all remaining records to all consoles. 2838 * 2839 * @do_cond_resched is set by the caller. It can be true only in schedulable 2840 * context. 2841 * 2842 * @next_seq is set to the sequence number after the last available record. 2843 * The value is valid only when this function returns true. It means that all 2844 * usable consoles are completely flushed. 2845 * 2846 * @handover will be set to true if a printk waiter has taken over the 2847 * console_lock, in which case the caller is no longer holding the 2848 * console_lock. Otherwise it is set to false. 2849 * 2850 * Returns true when there was at least one usable console and all messages 2851 * were flushed to all usable consoles. A returned false informs the caller 2852 * that everything was not flushed (either there were no usable consoles or 2853 * another context has taken over printing or it is a panic situation and this 2854 * is not the panic CPU). Regardless the reason, the caller should assume it 2855 * is not useful to immediately try again. 2856 * 2857 * Requires the console_lock. 2858 */ 2859 static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover) 2860 { 2861 static char dropped_text[DROPPED_TEXT_MAX]; 2862 static char ext_text[CONSOLE_EXT_LOG_MAX]; 2863 static char text[CONSOLE_LOG_MAX]; 2864 bool any_usable = false; 2865 struct console *con; 2866 bool any_progress; 2867 int cookie; 2868 2869 *next_seq = 0; 2870 *handover = false; 2871 2872 do { 2873 any_progress = false; 2874 2875 cookie = console_srcu_read_lock(); 2876 for_each_console_srcu(con) { 2877 bool progress; 2878 2879 if (!console_is_usable(con)) 2880 continue; 2881 any_usable = true; 2882 2883 if (console_srcu_read_flags(con) & CON_EXTENDED) { 2884 /* Extended consoles do not print "dropped messages". */ 2885 progress = console_emit_next_record(con, &text[0], 2886 &ext_text[0], NULL, 2887 handover, cookie); 2888 } else { 2889 progress = console_emit_next_record(con, &text[0], 2890 NULL, &dropped_text[0], 2891 handover, cookie); 2892 } 2893 2894 /* 2895 * If a handover has occurred, the SRCU read lock 2896 * is already released. 2897 */ 2898 if (*handover) 2899 return false; 2900 2901 /* Track the next of the highest seq flushed. */ 2902 if (con->seq > *next_seq) 2903 *next_seq = con->seq; 2904 2905 if (!progress) 2906 continue; 2907 any_progress = true; 2908 2909 /* Allow panic_cpu to take over the consoles safely. */ 2910 if (abandon_console_lock_in_panic()) 2911 goto abandon; 2912 2913 if (do_cond_resched) 2914 cond_resched(); 2915 } 2916 console_srcu_read_unlock(cookie); 2917 } while (any_progress); 2918 2919 return any_usable; 2920 2921 abandon: 2922 console_srcu_read_unlock(cookie); 2923 return false; 2924 } 2925 2926 /** 2927 * console_unlock - unblock the console subsystem from printing 2928 * 2929 * Releases the console_lock which the caller holds to block printing of 2930 * the console subsystem. 2931 * 2932 * While the console_lock was held, console output may have been buffered 2933 * by printk(). If this is the case, console_unlock(); emits 2934 * the output prior to releasing the lock. 2935 * 2936 * console_unlock(); may be called from any context. 2937 */ 2938 void console_unlock(void) 2939 { 2940 bool do_cond_resched; 2941 bool handover; 2942 bool flushed; 2943 u64 next_seq; 2944 2945 if (console_suspended) { 2946 up_console_sem(); 2947 return; 2948 } 2949 2950 /* 2951 * Console drivers are called with interrupts disabled, so 2952 * @console_may_schedule should be cleared before; however, we may 2953 * end up dumping a lot of lines, for example, if called from 2954 * console registration path, and should invoke cond_resched() 2955 * between lines if allowable. Not doing so can cause a very long 2956 * scheduling stall on a slow console leading to RCU stall and 2957 * softlockup warnings which exacerbate the issue with more 2958 * messages practically incapacitating the system. Therefore, create 2959 * a local to use for the printing loop. 2960 */ 2961 do_cond_resched = console_may_schedule; 2962 2963 do { 2964 console_may_schedule = 0; 2965 2966 flushed = console_flush_all(do_cond_resched, &next_seq, &handover); 2967 if (!handover) 2968 __console_unlock(); 2969 2970 /* 2971 * Abort if there was a failure to flush all messages to all 2972 * usable consoles. Either it is not possible to flush (in 2973 * which case it would be an infinite loop of retrying) or 2974 * another context has taken over printing. 2975 */ 2976 if (!flushed) 2977 break; 2978 2979 /* 2980 * Some context may have added new records after 2981 * console_flush_all() but before unlocking the console. 2982 * Re-check if there is a new record to flush. If the trylock 2983 * fails, another context is already handling the printing. 2984 */ 2985 } while (prb_read_valid(prb, next_seq, NULL) && console_trylock()); 2986 } 2987 EXPORT_SYMBOL(console_unlock); 2988 2989 /** 2990 * console_conditional_schedule - yield the CPU if required 2991 * 2992 * If the console code is currently allowed to sleep, and 2993 * if this CPU should yield the CPU to another task, do 2994 * so here. 2995 * 2996 * Must be called within console_lock();. 2997 */ 2998 void __sched console_conditional_schedule(void) 2999 { 3000 if (console_may_schedule) 3001 cond_resched(); 3002 } 3003 EXPORT_SYMBOL(console_conditional_schedule); 3004 3005 void console_unblank(void) 3006 { 3007 struct console *c; 3008 int cookie; 3009 3010 /* 3011 * Stop console printing because the unblank() callback may 3012 * assume the console is not within its write() callback. 3013 * 3014 * If @oops_in_progress is set, this may be an atomic context. 3015 * In that case, attempt a trylock as best-effort. 3016 */ 3017 if (oops_in_progress) { 3018 if (down_trylock_console_sem() != 0) 3019 return; 3020 } else 3021 console_lock(); 3022 3023 console_locked = 1; 3024 console_may_schedule = 0; 3025 3026 cookie = console_srcu_read_lock(); 3027 for_each_console_srcu(c) { 3028 if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank) 3029 c->unblank(); 3030 } 3031 console_srcu_read_unlock(cookie); 3032 3033 console_unlock(); 3034 3035 if (!oops_in_progress) 3036 pr_flush(1000, true); 3037 } 3038 3039 /** 3040 * console_flush_on_panic - flush console content on panic 3041 * @mode: flush all messages in buffer or just the pending ones 3042 * 3043 * Immediately output all pending messages no matter what. 3044 */ 3045 void console_flush_on_panic(enum con_flush_mode mode) 3046 { 3047 /* 3048 * If someone else is holding the console lock, trylock will fail 3049 * and may_schedule may be set. Ignore and proceed to unlock so 3050 * that messages are flushed out. As this can be called from any 3051 * context and we don't want to get preempted while flushing, 3052 * ensure may_schedule is cleared. 3053 */ 3054 console_trylock(); 3055 console_may_schedule = 0; 3056 3057 if (mode == CONSOLE_REPLAY_ALL) { 3058 struct console *c; 3059 int cookie; 3060 u64 seq; 3061 3062 seq = prb_first_valid_seq(prb); 3063 3064 cookie = console_srcu_read_lock(); 3065 for_each_console_srcu(c) { 3066 /* 3067 * If the above console_trylock() failed, this is an 3068 * unsynchronized assignment. But in that case, the 3069 * kernel is in "hope and pray" mode anyway. 3070 */ 3071 c->seq = seq; 3072 } 3073 console_srcu_read_unlock(cookie); 3074 } 3075 console_unlock(); 3076 } 3077 3078 /* 3079 * Return the console tty driver structure and its associated index 3080 */ 3081 struct tty_driver *console_device(int *index) 3082 { 3083 struct console *c; 3084 struct tty_driver *driver = NULL; 3085 int cookie; 3086 3087 /* 3088 * Take console_lock to serialize device() callback with 3089 * other console operations. For example, fg_console is 3090 * modified under console_lock when switching vt. 3091 */ 3092 console_lock(); 3093 3094 cookie = console_srcu_read_lock(); 3095 for_each_console_srcu(c) { 3096 if (!c->device) 3097 continue; 3098 driver = c->device(c, index); 3099 if (driver) 3100 break; 3101 } 3102 console_srcu_read_unlock(cookie); 3103 3104 console_unlock(); 3105 return driver; 3106 } 3107 3108 /* 3109 * Prevent further output on the passed console device so that (for example) 3110 * serial drivers can disable console output before suspending a port, and can 3111 * re-enable output afterwards. 3112 */ 3113 void console_stop(struct console *console) 3114 { 3115 __pr_flush(console, 1000, true); 3116 console_list_lock(); 3117 console_srcu_write_flags(console, console->flags & ~CON_ENABLED); 3118 console_list_unlock(); 3119 3120 /* 3121 * Ensure that all SRCU list walks have completed. All contexts must 3122 * be able to see that this console is disabled so that (for example) 3123 * the caller can suspend the port without risk of another context 3124 * using the port. 3125 */ 3126 synchronize_srcu(&console_srcu); 3127 } 3128 EXPORT_SYMBOL(console_stop); 3129 3130 void console_start(struct console *console) 3131 { 3132 console_list_lock(); 3133 console_srcu_write_flags(console, console->flags | CON_ENABLED); 3134 console_list_unlock(); 3135 __pr_flush(console, 1000, true); 3136 } 3137 EXPORT_SYMBOL(console_start); 3138 3139 static int __read_mostly keep_bootcon; 3140 3141 static int __init keep_bootcon_setup(char *str) 3142 { 3143 keep_bootcon = 1; 3144 pr_info("debug: skip boot console de-registration.\n"); 3145 3146 return 0; 3147 } 3148 3149 early_param("keep_bootcon", keep_bootcon_setup); 3150 3151 /* 3152 * This is called by register_console() to try to match 3153 * the newly registered console with any of the ones selected 3154 * by either the command line or add_preferred_console() and 3155 * setup/enable it. 3156 * 3157 * Care need to be taken with consoles that are statically 3158 * enabled such as netconsole 3159 */ 3160 static int try_enable_preferred_console(struct console *newcon, 3161 bool user_specified) 3162 { 3163 struct console_cmdline *c; 3164 int i, err; 3165 3166 for (i = 0, c = console_cmdline; 3167 i < MAX_CMDLINECONSOLES && c->name[0]; 3168 i++, c++) { 3169 if (c->user_specified != user_specified) 3170 continue; 3171 if (!newcon->match || 3172 newcon->match(newcon, c->name, c->index, c->options) != 0) { 3173 /* default matching */ 3174 BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name)); 3175 if (strcmp(c->name, newcon->name) != 0) 3176 continue; 3177 if (newcon->index >= 0 && 3178 newcon->index != c->index) 3179 continue; 3180 if (newcon->index < 0) 3181 newcon->index = c->index; 3182 3183 if (_braille_register_console(newcon, c)) 3184 return 0; 3185 3186 if (newcon->setup && 3187 (err = newcon->setup(newcon, c->options)) != 0) 3188 return err; 3189 } 3190 newcon->flags |= CON_ENABLED; 3191 if (i == preferred_console) 3192 newcon->flags |= CON_CONSDEV; 3193 return 0; 3194 } 3195 3196 /* 3197 * Some consoles, such as pstore and netconsole, can be enabled even 3198 * without matching. Accept the pre-enabled consoles only when match() 3199 * and setup() had a chance to be called. 3200 */ 3201 if (newcon->flags & CON_ENABLED && c->user_specified == user_specified) 3202 return 0; 3203 3204 return -ENOENT; 3205 } 3206 3207 /* Try to enable the console unconditionally */ 3208 static void try_enable_default_console(struct console *newcon) 3209 { 3210 if (newcon->index < 0) 3211 newcon->index = 0; 3212 3213 if (newcon->setup && newcon->setup(newcon, NULL) != 0) 3214 return; 3215 3216 newcon->flags |= CON_ENABLED; 3217 3218 if (newcon->device) 3219 newcon->flags |= CON_CONSDEV; 3220 } 3221 3222 #define con_printk(lvl, con, fmt, ...) \ 3223 printk(lvl pr_fmt("%sconsole [%s%d] " fmt), \ 3224 (con->flags & CON_BOOT) ? "boot" : "", \ 3225 con->name, con->index, ##__VA_ARGS__) 3226 3227 static void console_init_seq(struct console *newcon, bool bootcon_registered) 3228 { 3229 struct console *con; 3230 bool handover; 3231 3232 if (newcon->flags & (CON_PRINTBUFFER | CON_BOOT)) { 3233 /* Get a consistent copy of @syslog_seq. */ 3234 mutex_lock(&syslog_lock); 3235 newcon->seq = syslog_seq; 3236 mutex_unlock(&syslog_lock); 3237 } else { 3238 /* Begin with next message added to ringbuffer. */ 3239 newcon->seq = prb_next_seq(prb); 3240 3241 /* 3242 * If any enabled boot consoles are due to be unregistered 3243 * shortly, some may not be caught up and may be the same 3244 * device as @newcon. Since it is not known which boot console 3245 * is the same device, flush all consoles and, if necessary, 3246 * start with the message of the enabled boot console that is 3247 * the furthest behind. 3248 */ 3249 if (bootcon_registered && !keep_bootcon) { 3250 /* 3251 * Hold the console_lock to stop console printing and 3252 * guarantee safe access to console->seq. 3253 */ 3254 console_lock(); 3255 3256 /* 3257 * Flush all consoles and set the console to start at 3258 * the next unprinted sequence number. 3259 */ 3260 if (!console_flush_all(true, &newcon->seq, &handover)) { 3261 /* 3262 * Flushing failed. Just choose the lowest 3263 * sequence of the enabled boot consoles. 3264 */ 3265 3266 /* 3267 * If there was a handover, this context no 3268 * longer holds the console_lock. 3269 */ 3270 if (handover) 3271 console_lock(); 3272 3273 newcon->seq = prb_next_seq(prb); 3274 for_each_console(con) { 3275 if ((con->flags & CON_BOOT) && 3276 (con->flags & CON_ENABLED) && 3277 con->seq < newcon->seq) { 3278 newcon->seq = con->seq; 3279 } 3280 } 3281 } 3282 3283 console_unlock(); 3284 } 3285 } 3286 } 3287 3288 #define console_first() \ 3289 hlist_entry(console_list.first, struct console, node) 3290 3291 static int unregister_console_locked(struct console *console); 3292 3293 /* 3294 * The console driver calls this routine during kernel initialization 3295 * to register the console printing procedure with printk() and to 3296 * print any messages that were printed by the kernel before the 3297 * console driver was initialized. 3298 * 3299 * This can happen pretty early during the boot process (because of 3300 * early_printk) - sometimes before setup_arch() completes - be careful 3301 * of what kernel features are used - they may not be initialised yet. 3302 * 3303 * There are two types of consoles - bootconsoles (early_printk) and 3304 * "real" consoles (everything which is not a bootconsole) which are 3305 * handled differently. 3306 * - Any number of bootconsoles can be registered at any time. 3307 * - As soon as a "real" console is registered, all bootconsoles 3308 * will be unregistered automatically. 3309 * - Once a "real" console is registered, any attempt to register a 3310 * bootconsoles will be rejected 3311 */ 3312 void register_console(struct console *newcon) 3313 { 3314 struct console *con; 3315 bool bootcon_registered = false; 3316 bool realcon_registered = false; 3317 int err; 3318 3319 console_list_lock(); 3320 3321 for_each_console(con) { 3322 if (WARN(con == newcon, "console '%s%d' already registered\n", 3323 con->name, con->index)) { 3324 goto unlock; 3325 } 3326 3327 if (con->flags & CON_BOOT) 3328 bootcon_registered = true; 3329 else 3330 realcon_registered = true; 3331 } 3332 3333 /* Do not register boot consoles when there already is a real one. */ 3334 if ((newcon->flags & CON_BOOT) && realcon_registered) { 3335 pr_info("Too late to register bootconsole %s%d\n", 3336 newcon->name, newcon->index); 3337 goto unlock; 3338 } 3339 3340 /* 3341 * See if we want to enable this console driver by default. 3342 * 3343 * Nope when a console is preferred by the command line, device 3344 * tree, or SPCR. 3345 * 3346 * The first real console with tty binding (driver) wins. More 3347 * consoles might get enabled before the right one is found. 3348 * 3349 * Note that a console with tty binding will have CON_CONSDEV 3350 * flag set and will be first in the list. 3351 */ 3352 if (preferred_console < 0) { 3353 if (hlist_empty(&console_list) || !console_first()->device || 3354 console_first()->flags & CON_BOOT) { 3355 try_enable_default_console(newcon); 3356 } 3357 } 3358 3359 /* See if this console matches one we selected on the command line */ 3360 err = try_enable_preferred_console(newcon, true); 3361 3362 /* If not, try to match against the platform default(s) */ 3363 if (err == -ENOENT) 3364 err = try_enable_preferred_console(newcon, false); 3365 3366 /* printk() messages are not printed to the Braille console. */ 3367 if (err || newcon->flags & CON_BRL) 3368 goto unlock; 3369 3370 /* 3371 * If we have a bootconsole, and are switching to a real console, 3372 * don't print everything out again, since when the boot console, and 3373 * the real console are the same physical device, it's annoying to 3374 * see the beginning boot messages twice 3375 */ 3376 if (bootcon_registered && 3377 ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) { 3378 newcon->flags &= ~CON_PRINTBUFFER; 3379 } 3380 3381 newcon->dropped = 0; 3382 console_init_seq(newcon, bootcon_registered); 3383 3384 /* 3385 * Put this console in the list - keep the 3386 * preferred driver at the head of the list. 3387 */ 3388 if (hlist_empty(&console_list)) { 3389 /* Ensure CON_CONSDEV is always set for the head. */ 3390 newcon->flags |= CON_CONSDEV; 3391 hlist_add_head_rcu(&newcon->node, &console_list); 3392 3393 } else if (newcon->flags & CON_CONSDEV) { 3394 /* Only the new head can have CON_CONSDEV set. */ 3395 console_srcu_write_flags(console_first(), console_first()->flags & ~CON_CONSDEV); 3396 hlist_add_head_rcu(&newcon->node, &console_list); 3397 3398 } else { 3399 hlist_add_behind_rcu(&newcon->node, console_list.first); 3400 } 3401 3402 /* 3403 * No need to synchronize SRCU here! The caller does not rely 3404 * on all contexts being able to see the new console before 3405 * register_console() completes. 3406 */ 3407 3408 console_sysfs_notify(); 3409 3410 /* 3411 * By unregistering the bootconsoles after we enable the real console 3412 * we get the "console xxx enabled" message on all the consoles - 3413 * boot consoles, real consoles, etc - this is to ensure that end 3414 * users know there might be something in the kernel's log buffer that 3415 * went to the bootconsole (that they do not see on the real console) 3416 */ 3417 con_printk(KERN_INFO, newcon, "enabled\n"); 3418 if (bootcon_registered && 3419 ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) && 3420 !keep_bootcon) { 3421 struct hlist_node *tmp; 3422 3423 hlist_for_each_entry_safe(con, tmp, &console_list, node) { 3424 if (con->flags & CON_BOOT) 3425 unregister_console_locked(con); 3426 } 3427 } 3428 unlock: 3429 console_list_unlock(); 3430 } 3431 EXPORT_SYMBOL(register_console); 3432 3433 /* Must be called under console_list_lock(). */ 3434 static int unregister_console_locked(struct console *console) 3435 { 3436 int res; 3437 3438 lockdep_assert_console_list_lock_held(); 3439 3440 con_printk(KERN_INFO, console, "disabled\n"); 3441 3442 res = _braille_unregister_console(console); 3443 if (res < 0) 3444 return res; 3445 if (res > 0) 3446 return 0; 3447 3448 /* Disable it unconditionally */ 3449 console_srcu_write_flags(console, console->flags & ~CON_ENABLED); 3450 3451 if (!console_is_registered_locked(console)) 3452 return -ENODEV; 3453 3454 hlist_del_init_rcu(&console->node); 3455 3456 /* 3457 * <HISTORICAL> 3458 * If this isn't the last console and it has CON_CONSDEV set, we 3459 * need to set it on the next preferred console. 3460 * </HISTORICAL> 3461 * 3462 * The above makes no sense as there is no guarantee that the next 3463 * console has any device attached. Oh well.... 3464 */ 3465 if (!hlist_empty(&console_list) && console->flags & CON_CONSDEV) 3466 console_srcu_write_flags(console_first(), console_first()->flags | CON_CONSDEV); 3467 3468 /* 3469 * Ensure that all SRCU list walks have completed. All contexts 3470 * must not be able to see this console in the list so that any 3471 * exit/cleanup routines can be performed safely. 3472 */ 3473 synchronize_srcu(&console_srcu); 3474 3475 console_sysfs_notify(); 3476 3477 if (console->exit) 3478 res = console->exit(console); 3479 3480 return res; 3481 } 3482 3483 int unregister_console(struct console *console) 3484 { 3485 int res; 3486 3487 console_list_lock(); 3488 res = unregister_console_locked(console); 3489 console_list_unlock(); 3490 return res; 3491 } 3492 EXPORT_SYMBOL(unregister_console); 3493 3494 /** 3495 * console_force_preferred_locked - force a registered console preferred 3496 * @con: The registered console to force preferred. 3497 * 3498 * Must be called under console_list_lock(). 3499 */ 3500 void console_force_preferred_locked(struct console *con) 3501 { 3502 struct console *cur_pref_con; 3503 3504 if (!console_is_registered_locked(con)) 3505 return; 3506 3507 cur_pref_con = console_first(); 3508 3509 /* Already preferred? */ 3510 if (cur_pref_con == con) 3511 return; 3512 3513 /* 3514 * Delete, but do not re-initialize the entry. This allows the console 3515 * to continue to appear registered (via any hlist_unhashed_lockless() 3516 * checks), even though it was briefly removed from the console list. 3517 */ 3518 hlist_del_rcu(&con->node); 3519 3520 /* 3521 * Ensure that all SRCU list walks have completed so that the console 3522 * can be added to the beginning of the console list and its forward 3523 * list pointer can be re-initialized. 3524 */ 3525 synchronize_srcu(&console_srcu); 3526 3527 con->flags |= CON_CONSDEV; 3528 WARN_ON(!con->device); 3529 3530 /* Only the new head can have CON_CONSDEV set. */ 3531 console_srcu_write_flags(cur_pref_con, cur_pref_con->flags & ~CON_CONSDEV); 3532 hlist_add_head_rcu(&con->node, &console_list); 3533 } 3534 EXPORT_SYMBOL(console_force_preferred_locked); 3535 3536 /* 3537 * Initialize the console device. This is called *early*, so 3538 * we can't necessarily depend on lots of kernel help here. 3539 * Just do some early initializations, and do the complex setup 3540 * later. 3541 */ 3542 void __init console_init(void) 3543 { 3544 int ret; 3545 initcall_t call; 3546 initcall_entry_t *ce; 3547 3548 /* Setup the default TTY line discipline. */ 3549 n_tty_init(); 3550 3551 /* 3552 * set up the console device so that later boot sequences can 3553 * inform about problems etc.. 3554 */ 3555 ce = __con_initcall_start; 3556 trace_initcall_level("console"); 3557 while (ce < __con_initcall_end) { 3558 call = initcall_from_entry(ce); 3559 trace_initcall_start(call); 3560 ret = call(); 3561 trace_initcall_finish(call, ret); 3562 ce++; 3563 } 3564 } 3565 3566 /* 3567 * Some boot consoles access data that is in the init section and which will 3568 * be discarded after the initcalls have been run. To make sure that no code 3569 * will access this data, unregister the boot consoles in a late initcall. 3570 * 3571 * If for some reason, such as deferred probe or the driver being a loadable 3572 * module, the real console hasn't registered yet at this point, there will 3573 * be a brief interval in which no messages are logged to the console, which 3574 * makes it difficult to diagnose problems that occur during this time. 3575 * 3576 * To mitigate this problem somewhat, only unregister consoles whose memory 3577 * intersects with the init section. Note that all other boot consoles will 3578 * get unregistered when the real preferred console is registered. 3579 */ 3580 static int __init printk_late_init(void) 3581 { 3582 struct hlist_node *tmp; 3583 struct console *con; 3584 int ret; 3585 3586 console_list_lock(); 3587 hlist_for_each_entry_safe(con, tmp, &console_list, node) { 3588 if (!(con->flags & CON_BOOT)) 3589 continue; 3590 3591 /* Check addresses that might be used for enabled consoles. */ 3592 if (init_section_intersects(con, sizeof(*con)) || 3593 init_section_contains(con->write, 0) || 3594 init_section_contains(con->read, 0) || 3595 init_section_contains(con->device, 0) || 3596 init_section_contains(con->unblank, 0) || 3597 init_section_contains(con->data, 0)) { 3598 /* 3599 * Please, consider moving the reported consoles out 3600 * of the init section. 3601 */ 3602 pr_warn("bootconsole [%s%d] uses init memory and must be disabled even before the real one is ready\n", 3603 con->name, con->index); 3604 unregister_console_locked(con); 3605 } 3606 } 3607 console_list_unlock(); 3608 3609 ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL, 3610 console_cpu_notify); 3611 WARN_ON(ret < 0); 3612 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online", 3613 console_cpu_notify, NULL); 3614 WARN_ON(ret < 0); 3615 printk_sysctl_init(); 3616 return 0; 3617 } 3618 late_initcall(printk_late_init); 3619 3620 #if defined CONFIG_PRINTK 3621 /* If @con is specified, only wait for that console. Otherwise wait for all. */ 3622 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) 3623 { 3624 int remaining = timeout_ms; 3625 struct console *c; 3626 u64 last_diff = 0; 3627 u64 printk_seq; 3628 int cookie; 3629 u64 diff; 3630 u64 seq; 3631 3632 might_sleep(); 3633 3634 seq = prb_next_seq(prb); 3635 3636 for (;;) { 3637 diff = 0; 3638 3639 /* 3640 * Hold the console_lock to guarantee safe access to 3641 * console->seq and to prevent changes to @console_suspended 3642 * until all consoles have been processed. 3643 */ 3644 console_lock(); 3645 3646 cookie = console_srcu_read_lock(); 3647 for_each_console_srcu(c) { 3648 if (con && con != c) 3649 continue; 3650 if (!console_is_usable(c)) 3651 continue; 3652 printk_seq = c->seq; 3653 if (printk_seq < seq) 3654 diff += seq - printk_seq; 3655 } 3656 console_srcu_read_unlock(cookie); 3657 3658 /* 3659 * If consoles are suspended, it cannot be expected that they 3660 * make forward progress, so timeout immediately. @diff is 3661 * still used to return a valid flush status. 3662 */ 3663 if (console_suspended) 3664 remaining = 0; 3665 else if (diff != last_diff && reset_on_progress) 3666 remaining = timeout_ms; 3667 3668 console_unlock(); 3669 3670 if (diff == 0 || remaining == 0) 3671 break; 3672 3673 if (remaining < 0) { 3674 /* no timeout limit */ 3675 msleep(100); 3676 } else if (remaining < 100) { 3677 msleep(remaining); 3678 remaining = 0; 3679 } else { 3680 msleep(100); 3681 remaining -= 100; 3682 } 3683 3684 last_diff = diff; 3685 } 3686 3687 return (diff == 0); 3688 } 3689 3690 /** 3691 * pr_flush() - Wait for printing threads to catch up. 3692 * 3693 * @timeout_ms: The maximum time (in ms) to wait. 3694 * @reset_on_progress: Reset the timeout if forward progress is seen. 3695 * 3696 * A value of 0 for @timeout_ms means no waiting will occur. A value of -1 3697 * represents infinite waiting. 3698 * 3699 * If @reset_on_progress is true, the timeout will be reset whenever any 3700 * printer has been seen to make some forward progress. 3701 * 3702 * Context: Process context. May sleep while acquiring console lock. 3703 * Return: true if all enabled printers are caught up. 3704 */ 3705 static bool pr_flush(int timeout_ms, bool reset_on_progress) 3706 { 3707 return __pr_flush(NULL, timeout_ms, reset_on_progress); 3708 } 3709 3710 /* 3711 * Delayed printk version, for scheduler-internal messages: 3712 */ 3713 #define PRINTK_PENDING_WAKEUP 0x01 3714 #define PRINTK_PENDING_OUTPUT 0x02 3715 3716 static DEFINE_PER_CPU(int, printk_pending); 3717 3718 static void wake_up_klogd_work_func(struct irq_work *irq_work) 3719 { 3720 int pending = this_cpu_xchg(printk_pending, 0); 3721 3722 if (pending & PRINTK_PENDING_OUTPUT) { 3723 /* If trylock fails, someone else is doing the printing */ 3724 if (console_trylock()) 3725 console_unlock(); 3726 } 3727 3728 if (pending & PRINTK_PENDING_WAKEUP) 3729 wake_up_interruptible(&log_wait); 3730 } 3731 3732 static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = 3733 IRQ_WORK_INIT_LAZY(wake_up_klogd_work_func); 3734 3735 static void __wake_up_klogd(int val) 3736 { 3737 if (!printk_percpu_data_ready()) 3738 return; 3739 3740 preempt_disable(); 3741 /* 3742 * Guarantee any new records can be seen by tasks preparing to wait 3743 * before this context checks if the wait queue is empty. 3744 * 3745 * The full memory barrier within wq_has_sleeper() pairs with the full 3746 * memory barrier within set_current_state() of 3747 * prepare_to_wait_event(), which is called after ___wait_event() adds 3748 * the waiter but before it has checked the wait condition. 3749 * 3750 * This pairs with devkmsg_read:A and syslog_print:A. 3751 */ 3752 if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */ 3753 (val & PRINTK_PENDING_OUTPUT)) { 3754 this_cpu_or(printk_pending, val); 3755 irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); 3756 } 3757 preempt_enable(); 3758 } 3759 3760 void wake_up_klogd(void) 3761 { 3762 __wake_up_klogd(PRINTK_PENDING_WAKEUP); 3763 } 3764 3765 void defer_console_output(void) 3766 { 3767 /* 3768 * New messages may have been added directly to the ringbuffer 3769 * using vprintk_store(), so wake any waiters as well. 3770 */ 3771 __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); 3772 } 3773 3774 void printk_trigger_flush(void) 3775 { 3776 defer_console_output(); 3777 } 3778 3779 int vprintk_deferred(const char *fmt, va_list args) 3780 { 3781 int r; 3782 3783 r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); 3784 defer_console_output(); 3785 3786 return r; 3787 } 3788 3789 int _printk_deferred(const char *fmt, ...) 3790 { 3791 va_list args; 3792 int r; 3793 3794 va_start(args, fmt); 3795 r = vprintk_deferred(fmt, args); 3796 va_end(args); 3797 3798 return r; 3799 } 3800 3801 /* 3802 * printk rate limiting, lifted from the networking subsystem. 3803 * 3804 * This enforces a rate limit: not more than 10 kernel messages 3805 * every 5s to make a denial-of-service attack impossible. 3806 */ 3807 DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); 3808 3809 int __printk_ratelimit(const char *func) 3810 { 3811 return ___ratelimit(&printk_ratelimit_state, func); 3812 } 3813 EXPORT_SYMBOL(__printk_ratelimit); 3814 3815 /** 3816 * printk_timed_ratelimit - caller-controlled printk ratelimiting 3817 * @caller_jiffies: pointer to caller's state 3818 * @interval_msecs: minimum interval between prints 3819 * 3820 * printk_timed_ratelimit() returns true if more than @interval_msecs 3821 * milliseconds have elapsed since the last time printk_timed_ratelimit() 3822 * returned true. 3823 */ 3824 bool printk_timed_ratelimit(unsigned long *caller_jiffies, 3825 unsigned int interval_msecs) 3826 { 3827 unsigned long elapsed = jiffies - *caller_jiffies; 3828 3829 if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs)) 3830 return false; 3831 3832 *caller_jiffies = jiffies; 3833 return true; 3834 } 3835 EXPORT_SYMBOL(printk_timed_ratelimit); 3836 3837 static DEFINE_SPINLOCK(dump_list_lock); 3838 static LIST_HEAD(dump_list); 3839 3840 /** 3841 * kmsg_dump_register - register a kernel log dumper. 3842 * @dumper: pointer to the kmsg_dumper structure 3843 * 3844 * Adds a kernel log dumper to the system. The dump callback in the 3845 * structure will be called when the kernel oopses or panics and must be 3846 * set. Returns zero on success and %-EINVAL or %-EBUSY otherwise. 3847 */ 3848 int kmsg_dump_register(struct kmsg_dumper *dumper) 3849 { 3850 unsigned long flags; 3851 int err = -EBUSY; 3852 3853 /* The dump callback needs to be set */ 3854 if (!dumper->dump) 3855 return -EINVAL; 3856 3857 spin_lock_irqsave(&dump_list_lock, flags); 3858 /* Don't allow registering multiple times */ 3859 if (!dumper->registered) { 3860 dumper->registered = 1; 3861 list_add_tail_rcu(&dumper->list, &dump_list); 3862 err = 0; 3863 } 3864 spin_unlock_irqrestore(&dump_list_lock, flags); 3865 3866 return err; 3867 } 3868 EXPORT_SYMBOL_GPL(kmsg_dump_register); 3869 3870 /** 3871 * kmsg_dump_unregister - unregister a kmsg dumper. 3872 * @dumper: pointer to the kmsg_dumper structure 3873 * 3874 * Removes a dump device from the system. Returns zero on success and 3875 * %-EINVAL otherwise. 3876 */ 3877 int kmsg_dump_unregister(struct kmsg_dumper *dumper) 3878 { 3879 unsigned long flags; 3880 int err = -EINVAL; 3881 3882 spin_lock_irqsave(&dump_list_lock, flags); 3883 if (dumper->registered) { 3884 dumper->registered = 0; 3885 list_del_rcu(&dumper->list); 3886 err = 0; 3887 } 3888 spin_unlock_irqrestore(&dump_list_lock, flags); 3889 synchronize_rcu(); 3890 3891 return err; 3892 } 3893 EXPORT_SYMBOL_GPL(kmsg_dump_unregister); 3894 3895 static bool always_kmsg_dump; 3896 module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR); 3897 3898 const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason) 3899 { 3900 switch (reason) { 3901 case KMSG_DUMP_PANIC: 3902 return "Panic"; 3903 case KMSG_DUMP_OOPS: 3904 return "Oops"; 3905 case KMSG_DUMP_EMERG: 3906 return "Emergency"; 3907 case KMSG_DUMP_SHUTDOWN: 3908 return "Shutdown"; 3909 default: 3910 return "Unknown"; 3911 } 3912 } 3913 EXPORT_SYMBOL_GPL(kmsg_dump_reason_str); 3914 3915 /** 3916 * kmsg_dump - dump kernel log to kernel message dumpers. 3917 * @reason: the reason (oops, panic etc) for dumping 3918 * 3919 * Call each of the registered dumper's dump() callback, which can 3920 * retrieve the kmsg records with kmsg_dump_get_line() or 3921 * kmsg_dump_get_buffer(). 3922 */ 3923 void kmsg_dump(enum kmsg_dump_reason reason) 3924 { 3925 struct kmsg_dumper *dumper; 3926 3927 rcu_read_lock(); 3928 list_for_each_entry_rcu(dumper, &dump_list, list) { 3929 enum kmsg_dump_reason max_reason = dumper->max_reason; 3930 3931 /* 3932 * If client has not provided a specific max_reason, default 3933 * to KMSG_DUMP_OOPS, unless always_kmsg_dump was set. 3934 */ 3935 if (max_reason == KMSG_DUMP_UNDEF) { 3936 max_reason = always_kmsg_dump ? KMSG_DUMP_MAX : 3937 KMSG_DUMP_OOPS; 3938 } 3939 if (reason > max_reason) 3940 continue; 3941 3942 /* invoke dumper which will iterate over records */ 3943 dumper->dump(dumper, reason); 3944 } 3945 rcu_read_unlock(); 3946 } 3947 3948 /** 3949 * kmsg_dump_get_line - retrieve one kmsg log line 3950 * @iter: kmsg dump iterator 3951 * @syslog: include the "<4>" prefixes 3952 * @line: buffer to copy the line to 3953 * @size: maximum size of the buffer 3954 * @len: length of line placed into buffer 3955 * 3956 * Start at the beginning of the kmsg buffer, with the oldest kmsg 3957 * record, and copy one record into the provided buffer. 3958 * 3959 * Consecutive calls will return the next available record moving 3960 * towards the end of the buffer with the youngest messages. 3961 * 3962 * A return value of FALSE indicates that there are no more records to 3963 * read. 3964 */ 3965 bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, 3966 char *line, size_t size, size_t *len) 3967 { 3968 u64 min_seq = latched_seq_read_nolock(&clear_seq); 3969 struct printk_info info; 3970 unsigned int line_count; 3971 struct printk_record r; 3972 size_t l = 0; 3973 bool ret = false; 3974 3975 if (iter->cur_seq < min_seq) 3976 iter->cur_seq = min_seq; 3977 3978 prb_rec_init_rd(&r, &info, line, size); 3979 3980 /* Read text or count text lines? */ 3981 if (line) { 3982 if (!prb_read_valid(prb, iter->cur_seq, &r)) 3983 goto out; 3984 l = record_print_text(&r, syslog, printk_time); 3985 } else { 3986 if (!prb_read_valid_info(prb, iter->cur_seq, 3987 &info, &line_count)) { 3988 goto out; 3989 } 3990 l = get_record_print_text_size(&info, line_count, syslog, 3991 printk_time); 3992 3993 } 3994 3995 iter->cur_seq = r.info->seq + 1; 3996 ret = true; 3997 out: 3998 if (len) 3999 *len = l; 4000 return ret; 4001 } 4002 EXPORT_SYMBOL_GPL(kmsg_dump_get_line); 4003 4004 /** 4005 * kmsg_dump_get_buffer - copy kmsg log lines 4006 * @iter: kmsg dump iterator 4007 * @syslog: include the "<4>" prefixes 4008 * @buf: buffer to copy the line to 4009 * @size: maximum size of the buffer 4010 * @len_out: length of line placed into buffer 4011 * 4012 * Start at the end of the kmsg buffer and fill the provided buffer 4013 * with as many of the *youngest* kmsg records that fit into it. 4014 * If the buffer is large enough, all available kmsg records will be 4015 * copied with a single call. 4016 * 4017 * Consecutive calls will fill the buffer with the next block of 4018 * available older records, not including the earlier retrieved ones. 4019 * 4020 * A return value of FALSE indicates that there are no more records to 4021 * read. 4022 */ 4023 bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, 4024 char *buf, size_t size, size_t *len_out) 4025 { 4026 u64 min_seq = latched_seq_read_nolock(&clear_seq); 4027 struct printk_info info; 4028 struct printk_record r; 4029 u64 seq; 4030 u64 next_seq; 4031 size_t len = 0; 4032 bool ret = false; 4033 bool time = printk_time; 4034 4035 if (!buf || !size) 4036 goto out; 4037 4038 if (iter->cur_seq < min_seq) 4039 iter->cur_seq = min_seq; 4040 4041 if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) { 4042 if (info.seq != iter->cur_seq) { 4043 /* messages are gone, move to first available one */ 4044 iter->cur_seq = info.seq; 4045 } 4046 } 4047 4048 /* last entry */ 4049 if (iter->cur_seq >= iter->next_seq) 4050 goto out; 4051 4052 /* 4053 * Find first record that fits, including all following records, 4054 * into the user-provided buffer for this dump. Pass in size-1 4055 * because this function (by way of record_print_text()) will 4056 * not write more than size-1 bytes of text into @buf. 4057 */ 4058 seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq, 4059 size - 1, syslog, time); 4060 4061 /* 4062 * Next kmsg_dump_get_buffer() invocation will dump block of 4063 * older records stored right before this one. 4064 */ 4065 next_seq = seq; 4066 4067 prb_rec_init_rd(&r, &info, buf, size); 4068 4069 len = 0; 4070 prb_for_each_record(seq, prb, seq, &r) { 4071 if (r.info->seq >= iter->next_seq) 4072 break; 4073 4074 len += record_print_text(&r, syslog, time); 4075 4076 /* Adjust record to store to remaining buffer space. */ 4077 prb_rec_init_rd(&r, &info, buf + len, size - len); 4078 } 4079 4080 iter->next_seq = next_seq; 4081 ret = true; 4082 out: 4083 if (len_out) 4084 *len_out = len; 4085 return ret; 4086 } 4087 EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); 4088 4089 /** 4090 * kmsg_dump_rewind - reset the iterator 4091 * @iter: kmsg dump iterator 4092 * 4093 * Reset the dumper's iterator so that kmsg_dump_get_line() and 4094 * kmsg_dump_get_buffer() can be called again and used multiple 4095 * times within the same dumper.dump() callback. 4096 */ 4097 void kmsg_dump_rewind(struct kmsg_dump_iter *iter) 4098 { 4099 iter->cur_seq = latched_seq_read_nolock(&clear_seq); 4100 iter->next_seq = prb_next_seq(prb); 4101 } 4102 EXPORT_SYMBOL_GPL(kmsg_dump_rewind); 4103 4104 #endif 4105 4106 #ifdef CONFIG_SMP 4107 static atomic_t printk_cpu_sync_owner = ATOMIC_INIT(-1); 4108 static atomic_t printk_cpu_sync_nested = ATOMIC_INIT(0); 4109 4110 /** 4111 * __printk_cpu_sync_wait() - Busy wait until the printk cpu-reentrant 4112 * spinning lock is not owned by any CPU. 4113 * 4114 * Context: Any context. 4115 */ 4116 void __printk_cpu_sync_wait(void) 4117 { 4118 do { 4119 cpu_relax(); 4120 } while (atomic_read(&printk_cpu_sync_owner) != -1); 4121 } 4122 EXPORT_SYMBOL(__printk_cpu_sync_wait); 4123 4124 /** 4125 * __printk_cpu_sync_try_get() - Try to acquire the printk cpu-reentrant 4126 * spinning lock. 4127 * 4128 * If no processor has the lock, the calling processor takes the lock and 4129 * becomes the owner. If the calling processor is already the owner of the 4130 * lock, this function succeeds immediately. 4131 * 4132 * Context: Any context. Expects interrupts to be disabled. 4133 * Return: 1 on success, otherwise 0. 4134 */ 4135 int __printk_cpu_sync_try_get(void) 4136 { 4137 int cpu; 4138 int old; 4139 4140 cpu = smp_processor_id(); 4141 4142 /* 4143 * Guarantee loads and stores from this CPU when it is the lock owner 4144 * are _not_ visible to the previous lock owner. This pairs with 4145 * __printk_cpu_sync_put:B. 4146 * 4147 * Memory barrier involvement: 4148 * 4149 * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B, 4150 * then __printk_cpu_sync_put:A can never read from 4151 * __printk_cpu_sync_try_get:B. 4152 * 4153 * Relies on: 4154 * 4155 * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B 4156 * of the previous CPU 4157 * matching 4158 * ACQUIRE from __printk_cpu_sync_try_get:A to 4159 * __printk_cpu_sync_try_get:B of this CPU 4160 */ 4161 old = atomic_cmpxchg_acquire(&printk_cpu_sync_owner, -1, 4162 cpu); /* LMM(__printk_cpu_sync_try_get:A) */ 4163 if (old == -1) { 4164 /* 4165 * This CPU is now the owner and begins loading/storing 4166 * data: LMM(__printk_cpu_sync_try_get:B) 4167 */ 4168 return 1; 4169 4170 } else if (old == cpu) { 4171 /* This CPU is already the owner. */ 4172 atomic_inc(&printk_cpu_sync_nested); 4173 return 1; 4174 } 4175 4176 return 0; 4177 } 4178 EXPORT_SYMBOL(__printk_cpu_sync_try_get); 4179 4180 /** 4181 * __printk_cpu_sync_put() - Release the printk cpu-reentrant spinning lock. 4182 * 4183 * The calling processor must be the owner of the lock. 4184 * 4185 * Context: Any context. Expects interrupts to be disabled. 4186 */ 4187 void __printk_cpu_sync_put(void) 4188 { 4189 if (atomic_read(&printk_cpu_sync_nested)) { 4190 atomic_dec(&printk_cpu_sync_nested); 4191 return; 4192 } 4193 4194 /* 4195 * This CPU is finished loading/storing data: 4196 * LMM(__printk_cpu_sync_put:A) 4197 */ 4198 4199 /* 4200 * Guarantee loads and stores from this CPU when it was the 4201 * lock owner are visible to the next lock owner. This pairs 4202 * with __printk_cpu_sync_try_get:A. 4203 * 4204 * Memory barrier involvement: 4205 * 4206 * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B, 4207 * then __printk_cpu_sync_try_get:B reads from __printk_cpu_sync_put:A. 4208 * 4209 * Relies on: 4210 * 4211 * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B 4212 * of this CPU 4213 * matching 4214 * ACQUIRE from __printk_cpu_sync_try_get:A to 4215 * __printk_cpu_sync_try_get:B of the next CPU 4216 */ 4217 atomic_set_release(&printk_cpu_sync_owner, 4218 -1); /* LMM(__printk_cpu_sync_put:B) */ 4219 } 4220 EXPORT_SYMBOL(__printk_cpu_sync_put); 4221 #endif /* CONFIG_SMP */ 4222