1 /* 2 * Generic ring buffer 3 * 4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> 5 */ 6 #include <linux/ring_buffer.h> 7 #include <linux/trace_clock.h> 8 #include <linux/ftrace_irq.h> 9 #include <linux/spinlock.h> 10 #include <linux/debugfs.h> 11 #include <linux/uaccess.h> 12 #include <linux/hardirq.h> 13 #include <linux/module.h> 14 #include <linux/percpu.h> 15 #include <linux/mutex.h> 16 #include <linux/init.h> 17 #include <linux/hash.h> 18 #include <linux/list.h> 19 #include <linux/cpu.h> 20 #include <linux/fs.h> 21 22 #include "trace.h" 23 24 /* 25 * The ring buffer is made up of a list of pages. A separate list of pages is 26 * allocated for each CPU. A writer may only write to a buffer that is 27 * associated with the CPU it is currently executing on. A reader may read 28 * from any per cpu buffer. 29 * 30 * The reader is special. For each per cpu buffer, the reader has its own 31 * reader page. When a reader has read the entire reader page, this reader 32 * page is swapped with another page in the ring buffer. 33 * 34 * Now, as long as the writer is off the reader page, the reader can do what 35 * ever it wants with that page. The writer will never write to that page 36 * again (as long as it is out of the ring buffer). 37 * 38 * Here's some silly ASCII art. 39 * 40 * +------+ 41 * |reader| RING BUFFER 42 * |page | 43 * +------+ +---+ +---+ +---+ 44 * | |-->| |-->| | 45 * +---+ +---+ +---+ 46 * ^ | 47 * | | 48 * +---------------+ 49 * 50 * 51 * +------+ 52 * |reader| RING BUFFER 53 * |page |------------------v 54 * +------+ +---+ +---+ +---+ 55 * | |-->| |-->| | 56 * +---+ +---+ +---+ 57 * ^ | 58 * | | 59 * +---------------+ 60 * 61 * 62 * +------+ 63 * |reader| RING BUFFER 64 * |page |------------------v 65 * +------+ +---+ +---+ +---+ 66 * ^ | |-->| |-->| | 67 * | +---+ +---+ +---+ 68 * | | 69 * | | 70 * +------------------------------+ 71 * 72 * 73 * +------+ 74 * |buffer| RING BUFFER 75 * |page |------------------v 76 * +------+ +---+ +---+ +---+ 77 * ^ | | | |-->| | 78 * | New +---+ +---+ +---+ 79 * | Reader------^ | 80 * | page | 81 * +------------------------------+ 82 * 83 * 84 * After we make this swap, the reader can hand this page off to the splice 85 * code and be done with it. It can even allocate a new page if it needs to 86 * and swap that into the ring buffer. 87 * 88 * We will be using cmpxchg soon to make all this lockless. 89 * 90 */ 91 92 /* 93 * A fast way to enable or disable all ring buffers is to 94 * call tracing_on or tracing_off. Turning off the ring buffers 95 * prevents all ring buffers from being recorded to. 96 * Turning this switch on, makes it OK to write to the 97 * ring buffer, if the ring buffer is enabled itself. 98 * 99 * There's three layers that must be on in order to write 100 * to the ring buffer. 101 * 102 * 1) This global flag must be set. 103 * 2) The ring buffer must be enabled for recording. 104 * 3) The per cpu buffer must be enabled for recording. 105 * 106 * In case of an anomaly, this global flag has a bit set that 107 * will permantly disable all ring buffers. 108 */ 109 110 /* 111 * Global flag to disable all recording to ring buffers 112 * This has two bits: ON, DISABLED 113 * 114 * ON DISABLED 115 * ---- ---------- 116 * 0 0 : ring buffers are off 117 * 1 0 : ring buffers are on 118 * X 1 : ring buffers are permanently disabled 119 */ 120 121 enum { 122 RB_BUFFERS_ON_BIT = 0, 123 RB_BUFFERS_DISABLED_BIT = 1, 124 }; 125 126 enum { 127 RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT, 128 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, 129 }; 130 131 static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; 132 133 #define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) 134 135 /** 136 * tracing_on - enable all tracing buffers 137 * 138 * This function enables all tracing buffers that may have been 139 * disabled with tracing_off. 140 */ 141 void tracing_on(void) 142 { 143 set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags); 144 } 145 EXPORT_SYMBOL_GPL(tracing_on); 146 147 /** 148 * tracing_off - turn off all tracing buffers 149 * 150 * This function stops all tracing buffers from recording data. 151 * It does not disable any overhead the tracers themselves may 152 * be causing. This function simply causes all recording to 153 * the ring buffers to fail. 154 */ 155 void tracing_off(void) 156 { 157 clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags); 158 } 159 EXPORT_SYMBOL_GPL(tracing_off); 160 161 /** 162 * tracing_off_permanent - permanently disable ring buffers 163 * 164 * This function, once called, will disable all ring buffers 165 * permanently. 166 */ 167 void tracing_off_permanent(void) 168 { 169 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); 170 } 171 172 /** 173 * tracing_is_on - show state of ring buffers enabled 174 */ 175 int tracing_is_on(void) 176 { 177 return ring_buffer_flags == RB_BUFFERS_ON; 178 } 179 EXPORT_SYMBOL_GPL(tracing_is_on); 180 181 #include "trace.h" 182 183 #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) 184 #define RB_ALIGNMENT 4U 185 #define RB_MAX_SMALL_DATA 28 186 187 enum { 188 RB_LEN_TIME_EXTEND = 8, 189 RB_LEN_TIME_STAMP = 16, 190 }; 191 192 static inline int rb_null_event(struct ring_buffer_event *event) 193 { 194 return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0; 195 } 196 197 static inline int rb_discarded_event(struct ring_buffer_event *event) 198 { 199 return event->type == RINGBUF_TYPE_PADDING && event->time_delta; 200 } 201 202 static void rb_event_set_padding(struct ring_buffer_event *event) 203 { 204 event->type = RINGBUF_TYPE_PADDING; 205 event->time_delta = 0; 206 } 207 208 /** 209 * ring_buffer_event_discard - discard an event in the ring buffer 210 * @buffer: the ring buffer 211 * @event: the event to discard 212 * 213 * Sometimes a event that is in the ring buffer needs to be ignored. 214 * This function lets the user discard an event in the ring buffer 215 * and then that event will not be read later. 216 * 217 * Note, it is up to the user to be careful with this, and protect 218 * against races. If the user discards an event that has been consumed 219 * it is possible that it could corrupt the ring buffer. 220 */ 221 void ring_buffer_event_discard(struct ring_buffer_event *event) 222 { 223 event->type = RINGBUF_TYPE_PADDING; 224 /* time delta must be non zero */ 225 if (!event->time_delta) 226 event->time_delta = 1; 227 } 228 229 static unsigned 230 rb_event_data_length(struct ring_buffer_event *event) 231 { 232 unsigned length; 233 234 if (event->len) 235 length = event->len * RB_ALIGNMENT; 236 else 237 length = event->array[0]; 238 return length + RB_EVNT_HDR_SIZE; 239 } 240 241 /* inline for ring buffer fast paths */ 242 static unsigned 243 rb_event_length(struct ring_buffer_event *event) 244 { 245 switch (event->type) { 246 case RINGBUF_TYPE_PADDING: 247 if (rb_null_event(event)) 248 /* undefined */ 249 return -1; 250 return rb_event_data_length(event); 251 252 case RINGBUF_TYPE_TIME_EXTEND: 253 return RB_LEN_TIME_EXTEND; 254 255 case RINGBUF_TYPE_TIME_STAMP: 256 return RB_LEN_TIME_STAMP; 257 258 case RINGBUF_TYPE_DATA: 259 return rb_event_data_length(event); 260 default: 261 BUG(); 262 } 263 /* not hit */ 264 return 0; 265 } 266 267 /** 268 * ring_buffer_event_length - return the length of the event 269 * @event: the event to get the length of 270 */ 271 unsigned ring_buffer_event_length(struct ring_buffer_event *event) 272 { 273 unsigned length = rb_event_length(event); 274 if (event->type != RINGBUF_TYPE_DATA) 275 return length; 276 length -= RB_EVNT_HDR_SIZE; 277 if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) 278 length -= sizeof(event->array[0]); 279 return length; 280 } 281 EXPORT_SYMBOL_GPL(ring_buffer_event_length); 282 283 /* inline for ring buffer fast paths */ 284 static void * 285 rb_event_data(struct ring_buffer_event *event) 286 { 287 BUG_ON(event->type != RINGBUF_TYPE_DATA); 288 /* If length is in len field, then array[0] has the data */ 289 if (event->len) 290 return (void *)&event->array[0]; 291 /* Otherwise length is in array[0] and array[1] has the data */ 292 return (void *)&event->array[1]; 293 } 294 295 /** 296 * ring_buffer_event_data - return the data of the event 297 * @event: the event to get the data from 298 */ 299 void *ring_buffer_event_data(struct ring_buffer_event *event) 300 { 301 return rb_event_data(event); 302 } 303 EXPORT_SYMBOL_GPL(ring_buffer_event_data); 304 305 #define for_each_buffer_cpu(buffer, cpu) \ 306 for_each_cpu(cpu, buffer->cpumask) 307 308 #define TS_SHIFT 27 309 #define TS_MASK ((1ULL << TS_SHIFT) - 1) 310 #define TS_DELTA_TEST (~TS_MASK) 311 312 struct buffer_data_page { 313 u64 time_stamp; /* page time stamp */ 314 local_t commit; /* write committed index */ 315 unsigned char data[]; /* data of buffer page */ 316 }; 317 318 struct buffer_page { 319 local_t write; /* index for next write */ 320 unsigned read; /* index for next read */ 321 struct list_head list; /* list of free pages */ 322 struct buffer_data_page *page; /* Actual data page */ 323 }; 324 325 static void rb_init_page(struct buffer_data_page *bpage) 326 { 327 local_set(&bpage->commit, 0); 328 } 329 330 /** 331 * ring_buffer_page_len - the size of data on the page. 332 * @page: The page to read 333 * 334 * Returns the amount of data on the page, including buffer page header. 335 */ 336 size_t ring_buffer_page_len(void *page) 337 { 338 return local_read(&((struct buffer_data_page *)page)->commit) 339 + BUF_PAGE_HDR_SIZE; 340 } 341 342 /* 343 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing 344 * this issue out. 345 */ 346 static void free_buffer_page(struct buffer_page *bpage) 347 { 348 free_page((unsigned long)bpage->page); 349 kfree(bpage); 350 } 351 352 /* 353 * We need to fit the time_stamp delta into 27 bits. 354 */ 355 static inline int test_time_stamp(u64 delta) 356 { 357 if (delta & TS_DELTA_TEST) 358 return 1; 359 return 0; 360 } 361 362 #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) 363 364 /* 365 * head_page == tail_page && head == tail then buffer is empty. 366 */ 367 struct ring_buffer_per_cpu { 368 int cpu; 369 struct ring_buffer *buffer; 370 spinlock_t reader_lock; /* serialize readers */ 371 raw_spinlock_t lock; 372 struct lock_class_key lock_key; 373 struct list_head pages; 374 struct buffer_page *head_page; /* read from head */ 375 struct buffer_page *tail_page; /* write to tail */ 376 struct buffer_page *commit_page; /* committed pages */ 377 struct buffer_page *reader_page; 378 unsigned long overrun; 379 unsigned long entries; 380 u64 write_stamp; 381 u64 read_stamp; 382 atomic_t record_disabled; 383 }; 384 385 struct ring_buffer { 386 unsigned pages; 387 unsigned flags; 388 int cpus; 389 atomic_t record_disabled; 390 cpumask_var_t cpumask; 391 392 struct mutex mutex; 393 394 struct ring_buffer_per_cpu **buffers; 395 396 #ifdef CONFIG_HOTPLUG_CPU 397 struct notifier_block cpu_notify; 398 #endif 399 u64 (*clock)(void); 400 }; 401 402 struct ring_buffer_iter { 403 struct ring_buffer_per_cpu *cpu_buffer; 404 unsigned long head; 405 struct buffer_page *head_page; 406 u64 read_stamp; 407 }; 408 409 /* buffer may be either ring_buffer or ring_buffer_per_cpu */ 410 #define RB_WARN_ON(buffer, cond) \ 411 ({ \ 412 int _____ret = unlikely(cond); \ 413 if (_____ret) { \ 414 atomic_inc(&buffer->record_disabled); \ 415 WARN_ON(1); \ 416 } \ 417 _____ret; \ 418 }) 419 420 /* Up this if you want to test the TIME_EXTENTS and normalization */ 421 #define DEBUG_SHIFT 0 422 423 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) 424 { 425 u64 time; 426 427 preempt_disable_notrace(); 428 /* shift to debug/test normalization and TIME_EXTENTS */ 429 time = buffer->clock() << DEBUG_SHIFT; 430 preempt_enable_no_resched_notrace(); 431 432 return time; 433 } 434 EXPORT_SYMBOL_GPL(ring_buffer_time_stamp); 435 436 void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, 437 int cpu, u64 *ts) 438 { 439 /* Just stupid testing the normalize function and deltas */ 440 *ts >>= DEBUG_SHIFT; 441 } 442 EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); 443 444 /** 445 * check_pages - integrity check of buffer pages 446 * @cpu_buffer: CPU buffer with pages to test 447 * 448 * As a safety measure we check to make sure the data pages have not 449 * been corrupted. 450 */ 451 static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 452 { 453 struct list_head *head = &cpu_buffer->pages; 454 struct buffer_page *bpage, *tmp; 455 456 if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) 457 return -1; 458 if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) 459 return -1; 460 461 list_for_each_entry_safe(bpage, tmp, head, list) { 462 if (RB_WARN_ON(cpu_buffer, 463 bpage->list.next->prev != &bpage->list)) 464 return -1; 465 if (RB_WARN_ON(cpu_buffer, 466 bpage->list.prev->next != &bpage->list)) 467 return -1; 468 } 469 470 return 0; 471 } 472 473 static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, 474 unsigned nr_pages) 475 { 476 struct list_head *head = &cpu_buffer->pages; 477 struct buffer_page *bpage, *tmp; 478 unsigned long addr; 479 LIST_HEAD(pages); 480 unsigned i; 481 482 for (i = 0; i < nr_pages; i++) { 483 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 484 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); 485 if (!bpage) 486 goto free_pages; 487 list_add(&bpage->list, &pages); 488 489 addr = __get_free_page(GFP_KERNEL); 490 if (!addr) 491 goto free_pages; 492 bpage->page = (void *)addr; 493 rb_init_page(bpage->page); 494 } 495 496 list_splice(&pages, head); 497 498 rb_check_pages(cpu_buffer); 499 500 return 0; 501 502 free_pages: 503 list_for_each_entry_safe(bpage, tmp, &pages, list) { 504 list_del_init(&bpage->list); 505 free_buffer_page(bpage); 506 } 507 return -ENOMEM; 508 } 509 510 static struct ring_buffer_per_cpu * 511 rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) 512 { 513 struct ring_buffer_per_cpu *cpu_buffer; 514 struct buffer_page *bpage; 515 unsigned long addr; 516 int ret; 517 518 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), 519 GFP_KERNEL, cpu_to_node(cpu)); 520 if (!cpu_buffer) 521 return NULL; 522 523 cpu_buffer->cpu = cpu; 524 cpu_buffer->buffer = buffer; 525 spin_lock_init(&cpu_buffer->reader_lock); 526 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 527 INIT_LIST_HEAD(&cpu_buffer->pages); 528 529 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 530 GFP_KERNEL, cpu_to_node(cpu)); 531 if (!bpage) 532 goto fail_free_buffer; 533 534 cpu_buffer->reader_page = bpage; 535 addr = __get_free_page(GFP_KERNEL); 536 if (!addr) 537 goto fail_free_reader; 538 bpage->page = (void *)addr; 539 rb_init_page(bpage->page); 540 541 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 542 543 ret = rb_allocate_pages(cpu_buffer, buffer->pages); 544 if (ret < 0) 545 goto fail_free_reader; 546 547 cpu_buffer->head_page 548 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 549 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; 550 551 return cpu_buffer; 552 553 fail_free_reader: 554 free_buffer_page(cpu_buffer->reader_page); 555 556 fail_free_buffer: 557 kfree(cpu_buffer); 558 return NULL; 559 } 560 561 static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) 562 { 563 struct list_head *head = &cpu_buffer->pages; 564 struct buffer_page *bpage, *tmp; 565 566 free_buffer_page(cpu_buffer->reader_page); 567 568 list_for_each_entry_safe(bpage, tmp, head, list) { 569 list_del_init(&bpage->list); 570 free_buffer_page(bpage); 571 } 572 kfree(cpu_buffer); 573 } 574 575 /* 576 * Causes compile errors if the struct buffer_page gets bigger 577 * than the struct page. 578 */ 579 extern int ring_buffer_page_too_big(void); 580 581 #ifdef CONFIG_HOTPLUG_CPU 582 static int rb_cpu_notify(struct notifier_block *self, 583 unsigned long action, void *hcpu); 584 #endif 585 586 /** 587 * ring_buffer_alloc - allocate a new ring_buffer 588 * @size: the size in bytes per cpu that is needed. 589 * @flags: attributes to set for the ring buffer. 590 * 591 * Currently the only flag that is available is the RB_FL_OVERWRITE 592 * flag. This flag means that the buffer will overwrite old data 593 * when the buffer wraps. If this flag is not set, the buffer will 594 * drop data when the tail hits the head. 595 */ 596 struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) 597 { 598 struct ring_buffer *buffer; 599 int bsize; 600 int cpu; 601 602 /* Paranoid! Optimizes out when all is well */ 603 if (sizeof(struct buffer_page) > sizeof(struct page)) 604 ring_buffer_page_too_big(); 605 606 607 /* keep it in its own cache line */ 608 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), 609 GFP_KERNEL); 610 if (!buffer) 611 return NULL; 612 613 if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL)) 614 goto fail_free_buffer; 615 616 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 617 buffer->flags = flags; 618 buffer->clock = trace_clock_local; 619 620 /* need at least two pages */ 621 if (buffer->pages == 1) 622 buffer->pages++; 623 624 /* 625 * In case of non-hotplug cpu, if the ring-buffer is allocated 626 * in early initcall, it will not be notified of secondary cpus. 627 * In that off case, we need to allocate for all possible cpus. 628 */ 629 #ifdef CONFIG_HOTPLUG_CPU 630 get_online_cpus(); 631 cpumask_copy(buffer->cpumask, cpu_online_mask); 632 #else 633 cpumask_copy(buffer->cpumask, cpu_possible_mask); 634 #endif 635 buffer->cpus = nr_cpu_ids; 636 637 bsize = sizeof(void *) * nr_cpu_ids; 638 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()), 639 GFP_KERNEL); 640 if (!buffer->buffers) 641 goto fail_free_cpumask; 642 643 for_each_buffer_cpu(buffer, cpu) { 644 buffer->buffers[cpu] = 645 rb_allocate_cpu_buffer(buffer, cpu); 646 if (!buffer->buffers[cpu]) 647 goto fail_free_buffers; 648 } 649 650 #ifdef CONFIG_HOTPLUG_CPU 651 buffer->cpu_notify.notifier_call = rb_cpu_notify; 652 buffer->cpu_notify.priority = 0; 653 register_cpu_notifier(&buffer->cpu_notify); 654 #endif 655 656 put_online_cpus(); 657 mutex_init(&buffer->mutex); 658 659 return buffer; 660 661 fail_free_buffers: 662 for_each_buffer_cpu(buffer, cpu) { 663 if (buffer->buffers[cpu]) 664 rb_free_cpu_buffer(buffer->buffers[cpu]); 665 } 666 kfree(buffer->buffers); 667 668 fail_free_cpumask: 669 free_cpumask_var(buffer->cpumask); 670 put_online_cpus(); 671 672 fail_free_buffer: 673 kfree(buffer); 674 return NULL; 675 } 676 EXPORT_SYMBOL_GPL(ring_buffer_alloc); 677 678 /** 679 * ring_buffer_free - free a ring buffer. 680 * @buffer: the buffer to free. 681 */ 682 void 683 ring_buffer_free(struct ring_buffer *buffer) 684 { 685 int cpu; 686 687 get_online_cpus(); 688 689 #ifdef CONFIG_HOTPLUG_CPU 690 unregister_cpu_notifier(&buffer->cpu_notify); 691 #endif 692 693 for_each_buffer_cpu(buffer, cpu) 694 rb_free_cpu_buffer(buffer->buffers[cpu]); 695 696 put_online_cpus(); 697 698 free_cpumask_var(buffer->cpumask); 699 700 kfree(buffer); 701 } 702 EXPORT_SYMBOL_GPL(ring_buffer_free); 703 704 void ring_buffer_set_clock(struct ring_buffer *buffer, 705 u64 (*clock)(void)) 706 { 707 buffer->clock = clock; 708 } 709 710 static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); 711 712 static void 713 rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) 714 { 715 struct buffer_page *bpage; 716 struct list_head *p; 717 unsigned i; 718 719 atomic_inc(&cpu_buffer->record_disabled); 720 synchronize_sched(); 721 722 for (i = 0; i < nr_pages; i++) { 723 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) 724 return; 725 p = cpu_buffer->pages.next; 726 bpage = list_entry(p, struct buffer_page, list); 727 list_del_init(&bpage->list); 728 free_buffer_page(bpage); 729 } 730 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) 731 return; 732 733 rb_reset_cpu(cpu_buffer); 734 735 rb_check_pages(cpu_buffer); 736 737 atomic_dec(&cpu_buffer->record_disabled); 738 739 } 740 741 static void 742 rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, 743 struct list_head *pages, unsigned nr_pages) 744 { 745 struct buffer_page *bpage; 746 struct list_head *p; 747 unsigned i; 748 749 atomic_inc(&cpu_buffer->record_disabled); 750 synchronize_sched(); 751 752 for (i = 0; i < nr_pages; i++) { 753 if (RB_WARN_ON(cpu_buffer, list_empty(pages))) 754 return; 755 p = pages->next; 756 bpage = list_entry(p, struct buffer_page, list); 757 list_del_init(&bpage->list); 758 list_add_tail(&bpage->list, &cpu_buffer->pages); 759 } 760 rb_reset_cpu(cpu_buffer); 761 762 rb_check_pages(cpu_buffer); 763 764 atomic_dec(&cpu_buffer->record_disabled); 765 } 766 767 /** 768 * ring_buffer_resize - resize the ring buffer 769 * @buffer: the buffer to resize. 770 * @size: the new size. 771 * 772 * The tracer is responsible for making sure that the buffer is 773 * not being used while changing the size. 774 * Note: We may be able to change the above requirement by using 775 * RCU synchronizations. 776 * 777 * Minimum size is 2 * BUF_PAGE_SIZE. 778 * 779 * Returns -1 on failure. 780 */ 781 int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) 782 { 783 struct ring_buffer_per_cpu *cpu_buffer; 784 unsigned nr_pages, rm_pages, new_pages; 785 struct buffer_page *bpage, *tmp; 786 unsigned long buffer_size; 787 unsigned long addr; 788 LIST_HEAD(pages); 789 int i, cpu; 790 791 /* 792 * Always succeed at resizing a non-existent buffer: 793 */ 794 if (!buffer) 795 return size; 796 797 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 798 size *= BUF_PAGE_SIZE; 799 buffer_size = buffer->pages * BUF_PAGE_SIZE; 800 801 /* we need a minimum of two pages */ 802 if (size < BUF_PAGE_SIZE * 2) 803 size = BUF_PAGE_SIZE * 2; 804 805 if (size == buffer_size) 806 return size; 807 808 mutex_lock(&buffer->mutex); 809 get_online_cpus(); 810 811 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 812 813 if (size < buffer_size) { 814 815 /* easy case, just free pages */ 816 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) 817 goto out_fail; 818 819 rm_pages = buffer->pages - nr_pages; 820 821 for_each_buffer_cpu(buffer, cpu) { 822 cpu_buffer = buffer->buffers[cpu]; 823 rb_remove_pages(cpu_buffer, rm_pages); 824 } 825 goto out; 826 } 827 828 /* 829 * This is a bit more difficult. We only want to add pages 830 * when we can allocate enough for all CPUs. We do this 831 * by allocating all the pages and storing them on a local 832 * link list. If we succeed in our allocation, then we 833 * add these pages to the cpu_buffers. Otherwise we just free 834 * them all and return -ENOMEM; 835 */ 836 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) 837 goto out_fail; 838 839 new_pages = nr_pages - buffer->pages; 840 841 for_each_buffer_cpu(buffer, cpu) { 842 for (i = 0; i < new_pages; i++) { 843 bpage = kzalloc_node(ALIGN(sizeof(*bpage), 844 cache_line_size()), 845 GFP_KERNEL, cpu_to_node(cpu)); 846 if (!bpage) 847 goto free_pages; 848 list_add(&bpage->list, &pages); 849 addr = __get_free_page(GFP_KERNEL); 850 if (!addr) 851 goto free_pages; 852 bpage->page = (void *)addr; 853 rb_init_page(bpage->page); 854 } 855 } 856 857 for_each_buffer_cpu(buffer, cpu) { 858 cpu_buffer = buffer->buffers[cpu]; 859 rb_insert_pages(cpu_buffer, &pages, new_pages); 860 } 861 862 if (RB_WARN_ON(buffer, !list_empty(&pages))) 863 goto out_fail; 864 865 out: 866 buffer->pages = nr_pages; 867 put_online_cpus(); 868 mutex_unlock(&buffer->mutex); 869 870 return size; 871 872 free_pages: 873 list_for_each_entry_safe(bpage, tmp, &pages, list) { 874 list_del_init(&bpage->list); 875 free_buffer_page(bpage); 876 } 877 put_online_cpus(); 878 mutex_unlock(&buffer->mutex); 879 return -ENOMEM; 880 881 /* 882 * Something went totally wrong, and we are too paranoid 883 * to even clean up the mess. 884 */ 885 out_fail: 886 put_online_cpus(); 887 mutex_unlock(&buffer->mutex); 888 return -1; 889 } 890 EXPORT_SYMBOL_GPL(ring_buffer_resize); 891 892 static inline void * 893 __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) 894 { 895 return bpage->data + index; 896 } 897 898 static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) 899 { 900 return bpage->page->data + index; 901 } 902 903 static inline struct ring_buffer_event * 904 rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) 905 { 906 return __rb_page_index(cpu_buffer->reader_page, 907 cpu_buffer->reader_page->read); 908 } 909 910 static inline struct ring_buffer_event * 911 rb_head_event(struct ring_buffer_per_cpu *cpu_buffer) 912 { 913 return __rb_page_index(cpu_buffer->head_page, 914 cpu_buffer->head_page->read); 915 } 916 917 static inline struct ring_buffer_event * 918 rb_iter_head_event(struct ring_buffer_iter *iter) 919 { 920 return __rb_page_index(iter->head_page, iter->head); 921 } 922 923 static inline unsigned rb_page_write(struct buffer_page *bpage) 924 { 925 return local_read(&bpage->write); 926 } 927 928 static inline unsigned rb_page_commit(struct buffer_page *bpage) 929 { 930 return local_read(&bpage->page->commit); 931 } 932 933 /* Size is determined by what has been commited */ 934 static inline unsigned rb_page_size(struct buffer_page *bpage) 935 { 936 return rb_page_commit(bpage); 937 } 938 939 static inline unsigned 940 rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer) 941 { 942 return rb_page_commit(cpu_buffer->commit_page); 943 } 944 945 static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer) 946 { 947 return rb_page_commit(cpu_buffer->head_page); 948 } 949 950 /* 951 * When the tail hits the head and the buffer is in overwrite mode, 952 * the head jumps to the next page and all content on the previous 953 * page is discarded. But before doing so, we update the overrun 954 * variable of the buffer. 955 */ 956 static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer) 957 { 958 struct ring_buffer_event *event; 959 unsigned long head; 960 961 for (head = 0; head < rb_head_size(cpu_buffer); 962 head += rb_event_length(event)) { 963 964 event = __rb_page_index(cpu_buffer->head_page, head); 965 if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) 966 return; 967 /* Only count data entries */ 968 if (event->type != RINGBUF_TYPE_DATA) 969 continue; 970 cpu_buffer->overrun++; 971 cpu_buffer->entries--; 972 } 973 } 974 975 static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, 976 struct buffer_page **bpage) 977 { 978 struct list_head *p = (*bpage)->list.next; 979 980 if (p == &cpu_buffer->pages) 981 p = p->next; 982 983 *bpage = list_entry(p, struct buffer_page, list); 984 } 985 986 static inline unsigned 987 rb_event_index(struct ring_buffer_event *event) 988 { 989 unsigned long addr = (unsigned long)event; 990 991 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); 992 } 993 994 static int 995 rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, 996 struct ring_buffer_event *event) 997 { 998 unsigned long addr = (unsigned long)event; 999 unsigned long index; 1000 1001 index = rb_event_index(event); 1002 addr &= PAGE_MASK; 1003 1004 return cpu_buffer->commit_page->page == (void *)addr && 1005 rb_commit_index(cpu_buffer) == index; 1006 } 1007 1008 static void 1009 rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, 1010 struct ring_buffer_event *event) 1011 { 1012 unsigned long addr = (unsigned long)event; 1013 unsigned long index; 1014 1015 index = rb_event_index(event); 1016 addr &= PAGE_MASK; 1017 1018 while (cpu_buffer->commit_page->page != (void *)addr) { 1019 if (RB_WARN_ON(cpu_buffer, 1020 cpu_buffer->commit_page == cpu_buffer->tail_page)) 1021 return; 1022 cpu_buffer->commit_page->page->commit = 1023 cpu_buffer->commit_page->write; 1024 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 1025 cpu_buffer->write_stamp = 1026 cpu_buffer->commit_page->page->time_stamp; 1027 } 1028 1029 /* Now set the commit to the event's index */ 1030 local_set(&cpu_buffer->commit_page->page->commit, index); 1031 } 1032 1033 static void 1034 rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) 1035 { 1036 /* 1037 * We only race with interrupts and NMIs on this CPU. 1038 * If we own the commit event, then we can commit 1039 * all others that interrupted us, since the interruptions 1040 * are in stack format (they finish before they come 1041 * back to us). This allows us to do a simple loop to 1042 * assign the commit to the tail. 1043 */ 1044 again: 1045 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 1046 cpu_buffer->commit_page->page->commit = 1047 cpu_buffer->commit_page->write; 1048 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 1049 cpu_buffer->write_stamp = 1050 cpu_buffer->commit_page->page->time_stamp; 1051 /* add barrier to keep gcc from optimizing too much */ 1052 barrier(); 1053 } 1054 while (rb_commit_index(cpu_buffer) != 1055 rb_page_write(cpu_buffer->commit_page)) { 1056 cpu_buffer->commit_page->page->commit = 1057 cpu_buffer->commit_page->write; 1058 barrier(); 1059 } 1060 1061 /* again, keep gcc from optimizing */ 1062 barrier(); 1063 1064 /* 1065 * If an interrupt came in just after the first while loop 1066 * and pushed the tail page forward, we will be left with 1067 * a dangling commit that will never go forward. 1068 */ 1069 if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page)) 1070 goto again; 1071 } 1072 1073 static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 1074 { 1075 cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp; 1076 cpu_buffer->reader_page->read = 0; 1077 } 1078 1079 static void rb_inc_iter(struct ring_buffer_iter *iter) 1080 { 1081 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1082 1083 /* 1084 * The iterator could be on the reader page (it starts there). 1085 * But the head could have moved, since the reader was 1086 * found. Check for this case and assign the iterator 1087 * to the head page instead of next. 1088 */ 1089 if (iter->head_page == cpu_buffer->reader_page) 1090 iter->head_page = cpu_buffer->head_page; 1091 else 1092 rb_inc_page(cpu_buffer, &iter->head_page); 1093 1094 iter->read_stamp = iter->head_page->page->time_stamp; 1095 iter->head = 0; 1096 } 1097 1098 /** 1099 * ring_buffer_update_event - update event type and data 1100 * @event: the even to update 1101 * @type: the type of event 1102 * @length: the size of the event field in the ring buffer 1103 * 1104 * Update the type and data fields of the event. The length 1105 * is the actual size that is written to the ring buffer, 1106 * and with this, we can determine what to place into the 1107 * data field. 1108 */ 1109 static void 1110 rb_update_event(struct ring_buffer_event *event, 1111 unsigned type, unsigned length) 1112 { 1113 event->type = type; 1114 1115 switch (type) { 1116 1117 case RINGBUF_TYPE_PADDING: 1118 break; 1119 1120 case RINGBUF_TYPE_TIME_EXTEND: 1121 event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT); 1122 break; 1123 1124 case RINGBUF_TYPE_TIME_STAMP: 1125 event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT); 1126 break; 1127 1128 case RINGBUF_TYPE_DATA: 1129 length -= RB_EVNT_HDR_SIZE; 1130 if (length > RB_MAX_SMALL_DATA) { 1131 event->len = 0; 1132 event->array[0] = length; 1133 } else 1134 event->len = DIV_ROUND_UP(length, RB_ALIGNMENT); 1135 break; 1136 default: 1137 BUG(); 1138 } 1139 } 1140 1141 static unsigned rb_calculate_event_length(unsigned length) 1142 { 1143 struct ring_buffer_event event; /* Used only for sizeof array */ 1144 1145 /* zero length can cause confusions */ 1146 if (!length) 1147 length = 1; 1148 1149 if (length > RB_MAX_SMALL_DATA) 1150 length += sizeof(event.array[0]); 1151 1152 length += RB_EVNT_HDR_SIZE; 1153 length = ALIGN(length, RB_ALIGNMENT); 1154 1155 return length; 1156 } 1157 1158 static struct ring_buffer_event * 1159 __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 1160 unsigned type, unsigned long length, u64 *ts) 1161 { 1162 struct buffer_page *tail_page, *head_page, *reader_page, *commit_page; 1163 unsigned long tail, write; 1164 struct ring_buffer *buffer = cpu_buffer->buffer; 1165 struct ring_buffer_event *event; 1166 unsigned long flags; 1167 bool lock_taken = false; 1168 1169 commit_page = cpu_buffer->commit_page; 1170 /* we just need to protect against interrupts */ 1171 barrier(); 1172 tail_page = cpu_buffer->tail_page; 1173 write = local_add_return(length, &tail_page->write); 1174 tail = write - length; 1175 1176 /* See if we shot pass the end of this buffer page */ 1177 if (write > BUF_PAGE_SIZE) { 1178 struct buffer_page *next_page = tail_page; 1179 1180 local_irq_save(flags); 1181 /* 1182 * Since the write to the buffer is still not 1183 * fully lockless, we must be careful with NMIs. 1184 * The locks in the writers are taken when a write 1185 * crosses to a new page. The locks protect against 1186 * races with the readers (this will soon be fixed 1187 * with a lockless solution). 1188 * 1189 * Because we can not protect against NMIs, and we 1190 * want to keep traces reentrant, we need to manage 1191 * what happens when we are in an NMI. 1192 * 1193 * NMIs can happen after we take the lock. 1194 * If we are in an NMI, only take the lock 1195 * if it is not already taken. Otherwise 1196 * simply fail. 1197 */ 1198 if (unlikely(in_nmi())) { 1199 if (!__raw_spin_trylock(&cpu_buffer->lock)) 1200 goto out_reset; 1201 } else 1202 __raw_spin_lock(&cpu_buffer->lock); 1203 1204 lock_taken = true; 1205 1206 rb_inc_page(cpu_buffer, &next_page); 1207 1208 head_page = cpu_buffer->head_page; 1209 reader_page = cpu_buffer->reader_page; 1210 1211 /* we grabbed the lock before incrementing */ 1212 if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) 1213 goto out_reset; 1214 1215 /* 1216 * If for some reason, we had an interrupt storm that made 1217 * it all the way around the buffer, bail, and warn 1218 * about it. 1219 */ 1220 if (unlikely(next_page == commit_page)) { 1221 WARN_ON_ONCE(1); 1222 goto out_reset; 1223 } 1224 1225 if (next_page == head_page) { 1226 if (!(buffer->flags & RB_FL_OVERWRITE)) 1227 goto out_reset; 1228 1229 /* tail_page has not moved yet? */ 1230 if (tail_page == cpu_buffer->tail_page) { 1231 /* count overflows */ 1232 rb_update_overflow(cpu_buffer); 1233 1234 rb_inc_page(cpu_buffer, &head_page); 1235 cpu_buffer->head_page = head_page; 1236 cpu_buffer->head_page->read = 0; 1237 } 1238 } 1239 1240 /* 1241 * If the tail page is still the same as what we think 1242 * it is, then it is up to us to update the tail 1243 * pointer. 1244 */ 1245 if (tail_page == cpu_buffer->tail_page) { 1246 local_set(&next_page->write, 0); 1247 local_set(&next_page->page->commit, 0); 1248 cpu_buffer->tail_page = next_page; 1249 1250 /* reread the time stamp */ 1251 *ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu); 1252 cpu_buffer->tail_page->page->time_stamp = *ts; 1253 } 1254 1255 /* 1256 * The actual tail page has moved forward. 1257 */ 1258 if (tail < BUF_PAGE_SIZE) { 1259 /* Mark the rest of the page with padding */ 1260 event = __rb_page_index(tail_page, tail); 1261 rb_event_set_padding(event); 1262 } 1263 1264 if (tail <= BUF_PAGE_SIZE) 1265 /* Set the write back to the previous setting */ 1266 local_set(&tail_page->write, tail); 1267 1268 /* 1269 * If this was a commit entry that failed, 1270 * increment that too 1271 */ 1272 if (tail_page == cpu_buffer->commit_page && 1273 tail == rb_commit_index(cpu_buffer)) { 1274 rb_set_commit_to_write(cpu_buffer); 1275 } 1276 1277 __raw_spin_unlock(&cpu_buffer->lock); 1278 local_irq_restore(flags); 1279 1280 /* fail and let the caller try again */ 1281 return ERR_PTR(-EAGAIN); 1282 } 1283 1284 /* We reserved something on the buffer */ 1285 1286 if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE)) 1287 return NULL; 1288 1289 event = __rb_page_index(tail_page, tail); 1290 rb_update_event(event, type, length); 1291 1292 /* 1293 * If this is a commit and the tail is zero, then update 1294 * this page's time stamp. 1295 */ 1296 if (!tail && rb_is_commit(cpu_buffer, event)) 1297 cpu_buffer->commit_page->page->time_stamp = *ts; 1298 1299 return event; 1300 1301 out_reset: 1302 /* reset write */ 1303 if (tail <= BUF_PAGE_SIZE) 1304 local_set(&tail_page->write, tail); 1305 1306 if (likely(lock_taken)) 1307 __raw_spin_unlock(&cpu_buffer->lock); 1308 local_irq_restore(flags); 1309 return NULL; 1310 } 1311 1312 static int 1313 rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, 1314 u64 *ts, u64 *delta) 1315 { 1316 struct ring_buffer_event *event; 1317 static int once; 1318 int ret; 1319 1320 if (unlikely(*delta > (1ULL << 59) && !once++)) { 1321 printk(KERN_WARNING "Delta way too big! %llu" 1322 " ts=%llu write stamp = %llu\n", 1323 (unsigned long long)*delta, 1324 (unsigned long long)*ts, 1325 (unsigned long long)cpu_buffer->write_stamp); 1326 WARN_ON(1); 1327 } 1328 1329 /* 1330 * The delta is too big, we to add a 1331 * new timestamp. 1332 */ 1333 event = __rb_reserve_next(cpu_buffer, 1334 RINGBUF_TYPE_TIME_EXTEND, 1335 RB_LEN_TIME_EXTEND, 1336 ts); 1337 if (!event) 1338 return -EBUSY; 1339 1340 if (PTR_ERR(event) == -EAGAIN) 1341 return -EAGAIN; 1342 1343 /* Only a commited time event can update the write stamp */ 1344 if (rb_is_commit(cpu_buffer, event)) { 1345 /* 1346 * If this is the first on the page, then we need to 1347 * update the page itself, and just put in a zero. 1348 */ 1349 if (rb_event_index(event)) { 1350 event->time_delta = *delta & TS_MASK; 1351 event->array[0] = *delta >> TS_SHIFT; 1352 } else { 1353 cpu_buffer->commit_page->page->time_stamp = *ts; 1354 event->time_delta = 0; 1355 event->array[0] = 0; 1356 } 1357 cpu_buffer->write_stamp = *ts; 1358 /* let the caller know this was the commit */ 1359 ret = 1; 1360 } else { 1361 /* Darn, this is just wasted space */ 1362 event->time_delta = 0; 1363 event->array[0] = 0; 1364 ret = 0; 1365 } 1366 1367 *delta = 0; 1368 1369 return ret; 1370 } 1371 1372 static struct ring_buffer_event * 1373 rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, 1374 unsigned type, unsigned long length) 1375 { 1376 struct ring_buffer_event *event; 1377 u64 ts, delta; 1378 int commit = 0; 1379 int nr_loops = 0; 1380 1381 again: 1382 /* 1383 * We allow for interrupts to reenter here and do a trace. 1384 * If one does, it will cause this original code to loop 1385 * back here. Even with heavy interrupts happening, this 1386 * should only happen a few times in a row. If this happens 1387 * 1000 times in a row, there must be either an interrupt 1388 * storm or we have something buggy. 1389 * Bail! 1390 */ 1391 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) 1392 return NULL; 1393 1394 ts = ring_buffer_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); 1395 1396 /* 1397 * Only the first commit can update the timestamp. 1398 * Yes there is a race here. If an interrupt comes in 1399 * just after the conditional and it traces too, then it 1400 * will also check the deltas. More than one timestamp may 1401 * also be made. But only the entry that did the actual 1402 * commit will be something other than zero. 1403 */ 1404 if (cpu_buffer->tail_page == cpu_buffer->commit_page && 1405 rb_page_write(cpu_buffer->tail_page) == 1406 rb_commit_index(cpu_buffer)) { 1407 1408 delta = ts - cpu_buffer->write_stamp; 1409 1410 /* make sure this delta is calculated here */ 1411 barrier(); 1412 1413 /* Did the write stamp get updated already? */ 1414 if (unlikely(ts < cpu_buffer->write_stamp)) 1415 delta = 0; 1416 1417 if (test_time_stamp(delta)) { 1418 1419 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); 1420 1421 if (commit == -EBUSY) 1422 return NULL; 1423 1424 if (commit == -EAGAIN) 1425 goto again; 1426 1427 RB_WARN_ON(cpu_buffer, commit < 0); 1428 } 1429 } else 1430 /* Non commits have zero deltas */ 1431 delta = 0; 1432 1433 event = __rb_reserve_next(cpu_buffer, type, length, &ts); 1434 if (PTR_ERR(event) == -EAGAIN) 1435 goto again; 1436 1437 if (!event) { 1438 if (unlikely(commit)) 1439 /* 1440 * Ouch! We needed a timestamp and it was commited. But 1441 * we didn't get our event reserved. 1442 */ 1443 rb_set_commit_to_write(cpu_buffer); 1444 return NULL; 1445 } 1446 1447 /* 1448 * If the timestamp was commited, make the commit our entry 1449 * now so that we will update it when needed. 1450 */ 1451 if (commit) 1452 rb_set_commit_event(cpu_buffer, event); 1453 else if (!rb_is_commit(cpu_buffer, event)) 1454 delta = 0; 1455 1456 event->time_delta = delta; 1457 1458 return event; 1459 } 1460 1461 static DEFINE_PER_CPU(int, rb_need_resched); 1462 1463 /** 1464 * ring_buffer_lock_reserve - reserve a part of the buffer 1465 * @buffer: the ring buffer to reserve from 1466 * @length: the length of the data to reserve (excluding event header) 1467 * 1468 * Returns a reseverd event on the ring buffer to copy directly to. 1469 * The user of this interface will need to get the body to write into 1470 * and can use the ring_buffer_event_data() interface. 1471 * 1472 * The length is the length of the data needed, not the event length 1473 * which also includes the event header. 1474 * 1475 * Must be paired with ring_buffer_unlock_commit, unless NULL is returned. 1476 * If NULL is returned, then nothing has been allocated or locked. 1477 */ 1478 struct ring_buffer_event * 1479 ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) 1480 { 1481 struct ring_buffer_per_cpu *cpu_buffer; 1482 struct ring_buffer_event *event; 1483 int cpu, resched; 1484 1485 if (ring_buffer_flags != RB_BUFFERS_ON) 1486 return NULL; 1487 1488 if (atomic_read(&buffer->record_disabled)) 1489 return NULL; 1490 1491 /* If we are tracing schedule, we don't want to recurse */ 1492 resched = ftrace_preempt_disable(); 1493 1494 cpu = raw_smp_processor_id(); 1495 1496 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 1497 goto out; 1498 1499 cpu_buffer = buffer->buffers[cpu]; 1500 1501 if (atomic_read(&cpu_buffer->record_disabled)) 1502 goto out; 1503 1504 length = rb_calculate_event_length(length); 1505 if (length > BUF_PAGE_SIZE) 1506 goto out; 1507 1508 event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length); 1509 if (!event) 1510 goto out; 1511 1512 /* 1513 * Need to store resched state on this cpu. 1514 * Only the first needs to. 1515 */ 1516 1517 if (preempt_count() == 1) 1518 per_cpu(rb_need_resched, cpu) = resched; 1519 1520 return event; 1521 1522 out: 1523 ftrace_preempt_enable(resched); 1524 return NULL; 1525 } 1526 EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); 1527 1528 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, 1529 struct ring_buffer_event *event) 1530 { 1531 cpu_buffer->entries++; 1532 1533 /* Only process further if we own the commit */ 1534 if (!rb_is_commit(cpu_buffer, event)) 1535 return; 1536 1537 cpu_buffer->write_stamp += event->time_delta; 1538 1539 rb_set_commit_to_write(cpu_buffer); 1540 } 1541 1542 /** 1543 * ring_buffer_unlock_commit - commit a reserved 1544 * @buffer: The buffer to commit to 1545 * @event: The event pointer to commit. 1546 * 1547 * This commits the data to the ring buffer, and releases any locks held. 1548 * 1549 * Must be paired with ring_buffer_lock_reserve. 1550 */ 1551 int ring_buffer_unlock_commit(struct ring_buffer *buffer, 1552 struct ring_buffer_event *event) 1553 { 1554 struct ring_buffer_per_cpu *cpu_buffer; 1555 int cpu = raw_smp_processor_id(); 1556 1557 cpu_buffer = buffer->buffers[cpu]; 1558 1559 rb_commit(cpu_buffer, event); 1560 1561 /* 1562 * Only the last preempt count needs to restore preemption. 1563 */ 1564 if (preempt_count() == 1) 1565 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); 1566 else 1567 preempt_enable_no_resched_notrace(); 1568 1569 return 0; 1570 } 1571 EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); 1572 1573 /** 1574 * ring_buffer_write - write data to the buffer without reserving 1575 * @buffer: The ring buffer to write to. 1576 * @length: The length of the data being written (excluding the event header) 1577 * @data: The data to write to the buffer. 1578 * 1579 * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as 1580 * one function. If you already have the data to write to the buffer, it 1581 * may be easier to simply call this function. 1582 * 1583 * Note, like ring_buffer_lock_reserve, the length is the length of the data 1584 * and not the length of the event which would hold the header. 1585 */ 1586 int ring_buffer_write(struct ring_buffer *buffer, 1587 unsigned long length, 1588 void *data) 1589 { 1590 struct ring_buffer_per_cpu *cpu_buffer; 1591 struct ring_buffer_event *event; 1592 unsigned long event_length; 1593 void *body; 1594 int ret = -EBUSY; 1595 int cpu, resched; 1596 1597 if (ring_buffer_flags != RB_BUFFERS_ON) 1598 return -EBUSY; 1599 1600 if (atomic_read(&buffer->record_disabled)) 1601 return -EBUSY; 1602 1603 resched = ftrace_preempt_disable(); 1604 1605 cpu = raw_smp_processor_id(); 1606 1607 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 1608 goto out; 1609 1610 cpu_buffer = buffer->buffers[cpu]; 1611 1612 if (atomic_read(&cpu_buffer->record_disabled)) 1613 goto out; 1614 1615 event_length = rb_calculate_event_length(length); 1616 event = rb_reserve_next_event(cpu_buffer, 1617 RINGBUF_TYPE_DATA, event_length); 1618 if (!event) 1619 goto out; 1620 1621 body = rb_event_data(event); 1622 1623 memcpy(body, data, length); 1624 1625 rb_commit(cpu_buffer, event); 1626 1627 ret = 0; 1628 out: 1629 ftrace_preempt_enable(resched); 1630 1631 return ret; 1632 } 1633 EXPORT_SYMBOL_GPL(ring_buffer_write); 1634 1635 static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) 1636 { 1637 struct buffer_page *reader = cpu_buffer->reader_page; 1638 struct buffer_page *head = cpu_buffer->head_page; 1639 struct buffer_page *commit = cpu_buffer->commit_page; 1640 1641 return reader->read == rb_page_commit(reader) && 1642 (commit == reader || 1643 (commit == head && 1644 head->read == rb_page_commit(commit))); 1645 } 1646 1647 /** 1648 * ring_buffer_record_disable - stop all writes into the buffer 1649 * @buffer: The ring buffer to stop writes to. 1650 * 1651 * This prevents all writes to the buffer. Any attempt to write 1652 * to the buffer after this will fail and return NULL. 1653 * 1654 * The caller should call synchronize_sched() after this. 1655 */ 1656 void ring_buffer_record_disable(struct ring_buffer *buffer) 1657 { 1658 atomic_inc(&buffer->record_disabled); 1659 } 1660 EXPORT_SYMBOL_GPL(ring_buffer_record_disable); 1661 1662 /** 1663 * ring_buffer_record_enable - enable writes to the buffer 1664 * @buffer: The ring buffer to enable writes 1665 * 1666 * Note, multiple disables will need the same number of enables 1667 * to truely enable the writing (much like preempt_disable). 1668 */ 1669 void ring_buffer_record_enable(struct ring_buffer *buffer) 1670 { 1671 atomic_dec(&buffer->record_disabled); 1672 } 1673 EXPORT_SYMBOL_GPL(ring_buffer_record_enable); 1674 1675 /** 1676 * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer 1677 * @buffer: The ring buffer to stop writes to. 1678 * @cpu: The CPU buffer to stop 1679 * 1680 * This prevents all writes to the buffer. Any attempt to write 1681 * to the buffer after this will fail and return NULL. 1682 * 1683 * The caller should call synchronize_sched() after this. 1684 */ 1685 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu) 1686 { 1687 struct ring_buffer_per_cpu *cpu_buffer; 1688 1689 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 1690 return; 1691 1692 cpu_buffer = buffer->buffers[cpu]; 1693 atomic_inc(&cpu_buffer->record_disabled); 1694 } 1695 EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu); 1696 1697 /** 1698 * ring_buffer_record_enable_cpu - enable writes to the buffer 1699 * @buffer: The ring buffer to enable writes 1700 * @cpu: The CPU to enable. 1701 * 1702 * Note, multiple disables will need the same number of enables 1703 * to truely enable the writing (much like preempt_disable). 1704 */ 1705 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) 1706 { 1707 struct ring_buffer_per_cpu *cpu_buffer; 1708 1709 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 1710 return; 1711 1712 cpu_buffer = buffer->buffers[cpu]; 1713 atomic_dec(&cpu_buffer->record_disabled); 1714 } 1715 EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); 1716 1717 /** 1718 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer 1719 * @buffer: The ring buffer 1720 * @cpu: The per CPU buffer to get the entries from. 1721 */ 1722 unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) 1723 { 1724 struct ring_buffer_per_cpu *cpu_buffer; 1725 unsigned long ret; 1726 1727 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 1728 return 0; 1729 1730 cpu_buffer = buffer->buffers[cpu]; 1731 ret = cpu_buffer->entries; 1732 1733 return ret; 1734 } 1735 EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); 1736 1737 /** 1738 * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer 1739 * @buffer: The ring buffer 1740 * @cpu: The per CPU buffer to get the number of overruns from 1741 */ 1742 unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) 1743 { 1744 struct ring_buffer_per_cpu *cpu_buffer; 1745 unsigned long ret; 1746 1747 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 1748 return 0; 1749 1750 cpu_buffer = buffer->buffers[cpu]; 1751 ret = cpu_buffer->overrun; 1752 1753 return ret; 1754 } 1755 EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); 1756 1757 /** 1758 * ring_buffer_entries - get the number of entries in a buffer 1759 * @buffer: The ring buffer 1760 * 1761 * Returns the total number of entries in the ring buffer 1762 * (all CPU entries) 1763 */ 1764 unsigned long ring_buffer_entries(struct ring_buffer *buffer) 1765 { 1766 struct ring_buffer_per_cpu *cpu_buffer; 1767 unsigned long entries = 0; 1768 int cpu; 1769 1770 /* if you care about this being correct, lock the buffer */ 1771 for_each_buffer_cpu(buffer, cpu) { 1772 cpu_buffer = buffer->buffers[cpu]; 1773 entries += cpu_buffer->entries; 1774 } 1775 1776 return entries; 1777 } 1778 EXPORT_SYMBOL_GPL(ring_buffer_entries); 1779 1780 /** 1781 * ring_buffer_overrun_cpu - get the number of overruns in buffer 1782 * @buffer: The ring buffer 1783 * 1784 * Returns the total number of overruns in the ring buffer 1785 * (all CPU entries) 1786 */ 1787 unsigned long ring_buffer_overruns(struct ring_buffer *buffer) 1788 { 1789 struct ring_buffer_per_cpu *cpu_buffer; 1790 unsigned long overruns = 0; 1791 int cpu; 1792 1793 /* if you care about this being correct, lock the buffer */ 1794 for_each_buffer_cpu(buffer, cpu) { 1795 cpu_buffer = buffer->buffers[cpu]; 1796 overruns += cpu_buffer->overrun; 1797 } 1798 1799 return overruns; 1800 } 1801 EXPORT_SYMBOL_GPL(ring_buffer_overruns); 1802 1803 static void rb_iter_reset(struct ring_buffer_iter *iter) 1804 { 1805 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1806 1807 /* Iterator usage is expected to have record disabled */ 1808 if (list_empty(&cpu_buffer->reader_page->list)) { 1809 iter->head_page = cpu_buffer->head_page; 1810 iter->head = cpu_buffer->head_page->read; 1811 } else { 1812 iter->head_page = cpu_buffer->reader_page; 1813 iter->head = cpu_buffer->reader_page->read; 1814 } 1815 if (iter->head) 1816 iter->read_stamp = cpu_buffer->read_stamp; 1817 else 1818 iter->read_stamp = iter->head_page->page->time_stamp; 1819 } 1820 1821 /** 1822 * ring_buffer_iter_reset - reset an iterator 1823 * @iter: The iterator to reset 1824 * 1825 * Resets the iterator, so that it will start from the beginning 1826 * again. 1827 */ 1828 void ring_buffer_iter_reset(struct ring_buffer_iter *iter) 1829 { 1830 struct ring_buffer_per_cpu *cpu_buffer; 1831 unsigned long flags; 1832 1833 if (!iter) 1834 return; 1835 1836 cpu_buffer = iter->cpu_buffer; 1837 1838 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 1839 rb_iter_reset(iter); 1840 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 1841 } 1842 EXPORT_SYMBOL_GPL(ring_buffer_iter_reset); 1843 1844 /** 1845 * ring_buffer_iter_empty - check if an iterator has no more to read 1846 * @iter: The iterator to check 1847 */ 1848 int ring_buffer_iter_empty(struct ring_buffer_iter *iter) 1849 { 1850 struct ring_buffer_per_cpu *cpu_buffer; 1851 1852 cpu_buffer = iter->cpu_buffer; 1853 1854 return iter->head_page == cpu_buffer->commit_page && 1855 iter->head == rb_commit_index(cpu_buffer); 1856 } 1857 EXPORT_SYMBOL_GPL(ring_buffer_iter_empty); 1858 1859 static void 1860 rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, 1861 struct ring_buffer_event *event) 1862 { 1863 u64 delta; 1864 1865 switch (event->type) { 1866 case RINGBUF_TYPE_PADDING: 1867 return; 1868 1869 case RINGBUF_TYPE_TIME_EXTEND: 1870 delta = event->array[0]; 1871 delta <<= TS_SHIFT; 1872 delta += event->time_delta; 1873 cpu_buffer->read_stamp += delta; 1874 return; 1875 1876 case RINGBUF_TYPE_TIME_STAMP: 1877 /* FIXME: not implemented */ 1878 return; 1879 1880 case RINGBUF_TYPE_DATA: 1881 cpu_buffer->read_stamp += event->time_delta; 1882 return; 1883 1884 default: 1885 BUG(); 1886 } 1887 return; 1888 } 1889 1890 static void 1891 rb_update_iter_read_stamp(struct ring_buffer_iter *iter, 1892 struct ring_buffer_event *event) 1893 { 1894 u64 delta; 1895 1896 switch (event->type) { 1897 case RINGBUF_TYPE_PADDING: 1898 return; 1899 1900 case RINGBUF_TYPE_TIME_EXTEND: 1901 delta = event->array[0]; 1902 delta <<= TS_SHIFT; 1903 delta += event->time_delta; 1904 iter->read_stamp += delta; 1905 return; 1906 1907 case RINGBUF_TYPE_TIME_STAMP: 1908 /* FIXME: not implemented */ 1909 return; 1910 1911 case RINGBUF_TYPE_DATA: 1912 iter->read_stamp += event->time_delta; 1913 return; 1914 1915 default: 1916 BUG(); 1917 } 1918 return; 1919 } 1920 1921 static struct buffer_page * 1922 rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 1923 { 1924 struct buffer_page *reader = NULL; 1925 unsigned long flags; 1926 int nr_loops = 0; 1927 1928 local_irq_save(flags); 1929 __raw_spin_lock(&cpu_buffer->lock); 1930 1931 again: 1932 /* 1933 * This should normally only loop twice. But because the 1934 * start of the reader inserts an empty page, it causes 1935 * a case where we will loop three times. There should be no 1936 * reason to loop four times (that I know of). 1937 */ 1938 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) { 1939 reader = NULL; 1940 goto out; 1941 } 1942 1943 reader = cpu_buffer->reader_page; 1944 1945 /* If there's more to read, return this page */ 1946 if (cpu_buffer->reader_page->read < rb_page_size(reader)) 1947 goto out; 1948 1949 /* Never should we have an index greater than the size */ 1950 if (RB_WARN_ON(cpu_buffer, 1951 cpu_buffer->reader_page->read > rb_page_size(reader))) 1952 goto out; 1953 1954 /* check if we caught up to the tail */ 1955 reader = NULL; 1956 if (cpu_buffer->commit_page == cpu_buffer->reader_page) 1957 goto out; 1958 1959 /* 1960 * Splice the empty reader page into the list around the head. 1961 * Reset the reader page to size zero. 1962 */ 1963 1964 reader = cpu_buffer->head_page; 1965 cpu_buffer->reader_page->list.next = reader->list.next; 1966 cpu_buffer->reader_page->list.prev = reader->list.prev; 1967 1968 local_set(&cpu_buffer->reader_page->write, 0); 1969 local_set(&cpu_buffer->reader_page->page->commit, 0); 1970 1971 /* Make the reader page now replace the head */ 1972 reader->list.prev->next = &cpu_buffer->reader_page->list; 1973 reader->list.next->prev = &cpu_buffer->reader_page->list; 1974 1975 /* 1976 * If the tail is on the reader, then we must set the head 1977 * to the inserted page, otherwise we set it one before. 1978 */ 1979 cpu_buffer->head_page = cpu_buffer->reader_page; 1980 1981 if (cpu_buffer->commit_page != reader) 1982 rb_inc_page(cpu_buffer, &cpu_buffer->head_page); 1983 1984 /* Finally update the reader page to the new head */ 1985 cpu_buffer->reader_page = reader; 1986 rb_reset_reader_page(cpu_buffer); 1987 1988 goto again; 1989 1990 out: 1991 __raw_spin_unlock(&cpu_buffer->lock); 1992 local_irq_restore(flags); 1993 1994 return reader; 1995 } 1996 1997 static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) 1998 { 1999 struct ring_buffer_event *event; 2000 struct buffer_page *reader; 2001 unsigned length; 2002 2003 reader = rb_get_reader_page(cpu_buffer); 2004 2005 /* This function should not be called when buffer is empty */ 2006 if (RB_WARN_ON(cpu_buffer, !reader)) 2007 return; 2008 2009 event = rb_reader_event(cpu_buffer); 2010 2011 if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event)) 2012 cpu_buffer->entries--; 2013 2014 rb_update_read_stamp(cpu_buffer, event); 2015 2016 length = rb_event_length(event); 2017 cpu_buffer->reader_page->read += length; 2018 } 2019 2020 static void rb_advance_iter(struct ring_buffer_iter *iter) 2021 { 2022 struct ring_buffer *buffer; 2023 struct ring_buffer_per_cpu *cpu_buffer; 2024 struct ring_buffer_event *event; 2025 unsigned length; 2026 2027 cpu_buffer = iter->cpu_buffer; 2028 buffer = cpu_buffer->buffer; 2029 2030 /* 2031 * Check if we are at the end of the buffer. 2032 */ 2033 if (iter->head >= rb_page_size(iter->head_page)) { 2034 if (RB_WARN_ON(buffer, 2035 iter->head_page == cpu_buffer->commit_page)) 2036 return; 2037 rb_inc_iter(iter); 2038 return; 2039 } 2040 2041 event = rb_iter_head_event(iter); 2042 2043 length = rb_event_length(event); 2044 2045 /* 2046 * This should not be called to advance the header if we are 2047 * at the tail of the buffer. 2048 */ 2049 if (RB_WARN_ON(cpu_buffer, 2050 (iter->head_page == cpu_buffer->commit_page) && 2051 (iter->head + length > rb_commit_index(cpu_buffer)))) 2052 return; 2053 2054 rb_update_iter_read_stamp(iter, event); 2055 2056 iter->head += length; 2057 2058 /* check for end of page padding */ 2059 if ((iter->head >= rb_page_size(iter->head_page)) && 2060 (iter->head_page != cpu_buffer->commit_page)) 2061 rb_advance_iter(iter); 2062 } 2063 2064 static struct ring_buffer_event * 2065 rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) 2066 { 2067 struct ring_buffer_per_cpu *cpu_buffer; 2068 struct ring_buffer_event *event; 2069 struct buffer_page *reader; 2070 int nr_loops = 0; 2071 2072 cpu_buffer = buffer->buffers[cpu]; 2073 2074 again: 2075 /* 2076 * We repeat when a timestamp is encountered. It is possible 2077 * to get multiple timestamps from an interrupt entering just 2078 * as one timestamp is about to be written. The max times 2079 * that this can happen is the number of nested interrupts we 2080 * can have. Nesting 10 deep of interrupts is clearly 2081 * an anomaly. 2082 */ 2083 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) 2084 return NULL; 2085 2086 reader = rb_get_reader_page(cpu_buffer); 2087 if (!reader) 2088 return NULL; 2089 2090 event = rb_reader_event(cpu_buffer); 2091 2092 switch (event->type) { 2093 case RINGBUF_TYPE_PADDING: 2094 if (rb_null_event(event)) 2095 RB_WARN_ON(cpu_buffer, 1); 2096 /* 2097 * Because the writer could be discarding every 2098 * event it creates (which would probably be bad) 2099 * if we were to go back to "again" then we may never 2100 * catch up, and will trigger the warn on, or lock 2101 * the box. Return the padding, and we will release 2102 * the current locks, and try again. 2103 */ 2104 rb_advance_reader(cpu_buffer); 2105 return event; 2106 2107 case RINGBUF_TYPE_TIME_EXTEND: 2108 /* Internal data, OK to advance */ 2109 rb_advance_reader(cpu_buffer); 2110 goto again; 2111 2112 case RINGBUF_TYPE_TIME_STAMP: 2113 /* FIXME: not implemented */ 2114 rb_advance_reader(cpu_buffer); 2115 goto again; 2116 2117 case RINGBUF_TYPE_DATA: 2118 if (ts) { 2119 *ts = cpu_buffer->read_stamp + event->time_delta; 2120 ring_buffer_normalize_time_stamp(buffer, 2121 cpu_buffer->cpu, ts); 2122 } 2123 return event; 2124 2125 default: 2126 BUG(); 2127 } 2128 2129 return NULL; 2130 } 2131 EXPORT_SYMBOL_GPL(ring_buffer_peek); 2132 2133 static struct ring_buffer_event * 2134 rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) 2135 { 2136 struct ring_buffer *buffer; 2137 struct ring_buffer_per_cpu *cpu_buffer; 2138 struct ring_buffer_event *event; 2139 int nr_loops = 0; 2140 2141 if (ring_buffer_iter_empty(iter)) 2142 return NULL; 2143 2144 cpu_buffer = iter->cpu_buffer; 2145 buffer = cpu_buffer->buffer; 2146 2147 again: 2148 /* 2149 * We repeat when a timestamp is encountered. It is possible 2150 * to get multiple timestamps from an interrupt entering just 2151 * as one timestamp is about to be written. The max times 2152 * that this can happen is the number of nested interrupts we 2153 * can have. Nesting 10 deep of interrupts is clearly 2154 * an anomaly. 2155 */ 2156 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) 2157 return NULL; 2158 2159 if (rb_per_cpu_empty(cpu_buffer)) 2160 return NULL; 2161 2162 event = rb_iter_head_event(iter); 2163 2164 switch (event->type) { 2165 case RINGBUF_TYPE_PADDING: 2166 if (rb_null_event(event)) { 2167 rb_inc_iter(iter); 2168 goto again; 2169 } 2170 rb_advance_iter(iter); 2171 return event; 2172 2173 case RINGBUF_TYPE_TIME_EXTEND: 2174 /* Internal data, OK to advance */ 2175 rb_advance_iter(iter); 2176 goto again; 2177 2178 case RINGBUF_TYPE_TIME_STAMP: 2179 /* FIXME: not implemented */ 2180 rb_advance_iter(iter); 2181 goto again; 2182 2183 case RINGBUF_TYPE_DATA: 2184 if (ts) { 2185 *ts = iter->read_stamp + event->time_delta; 2186 ring_buffer_normalize_time_stamp(buffer, 2187 cpu_buffer->cpu, ts); 2188 } 2189 return event; 2190 2191 default: 2192 BUG(); 2193 } 2194 2195 return NULL; 2196 } 2197 EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); 2198 2199 /** 2200 * ring_buffer_peek - peek at the next event to be read 2201 * @buffer: The ring buffer to read 2202 * @cpu: The cpu to peak at 2203 * @ts: The timestamp counter of this event. 2204 * 2205 * This will return the event that will be read next, but does 2206 * not consume the data. 2207 */ 2208 struct ring_buffer_event * 2209 ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) 2210 { 2211 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 2212 struct ring_buffer_event *event; 2213 unsigned long flags; 2214 2215 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2216 return NULL; 2217 2218 again: 2219 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2220 event = rb_buffer_peek(buffer, cpu, ts); 2221 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2222 2223 if (event && event->type == RINGBUF_TYPE_PADDING) { 2224 cpu_relax(); 2225 goto again; 2226 } 2227 2228 return event; 2229 } 2230 2231 /** 2232 * ring_buffer_iter_peek - peek at the next event to be read 2233 * @iter: The ring buffer iterator 2234 * @ts: The timestamp counter of this event. 2235 * 2236 * This will return the event that will be read next, but does 2237 * not increment the iterator. 2238 */ 2239 struct ring_buffer_event * 2240 ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) 2241 { 2242 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 2243 struct ring_buffer_event *event; 2244 unsigned long flags; 2245 2246 again: 2247 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2248 event = rb_iter_peek(iter, ts); 2249 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2250 2251 if (event && event->type == RINGBUF_TYPE_PADDING) { 2252 cpu_relax(); 2253 goto again; 2254 } 2255 2256 return event; 2257 } 2258 2259 /** 2260 * ring_buffer_consume - return an event and consume it 2261 * @buffer: The ring buffer to get the next event from 2262 * 2263 * Returns the next event in the ring buffer, and that event is consumed. 2264 * Meaning, that sequential reads will keep returning a different event, 2265 * and eventually empty the ring buffer if the producer is slower. 2266 */ 2267 struct ring_buffer_event * 2268 ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 2269 { 2270 struct ring_buffer_per_cpu *cpu_buffer; 2271 struct ring_buffer_event *event = NULL; 2272 unsigned long flags; 2273 2274 again: 2275 /* might be called in atomic */ 2276 preempt_disable(); 2277 2278 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2279 goto out; 2280 2281 cpu_buffer = buffer->buffers[cpu]; 2282 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2283 2284 event = rb_buffer_peek(buffer, cpu, ts); 2285 if (!event) 2286 goto out_unlock; 2287 2288 rb_advance_reader(cpu_buffer); 2289 2290 out_unlock: 2291 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2292 2293 out: 2294 preempt_enable(); 2295 2296 if (event && event->type == RINGBUF_TYPE_PADDING) { 2297 cpu_relax(); 2298 goto again; 2299 } 2300 2301 return event; 2302 } 2303 EXPORT_SYMBOL_GPL(ring_buffer_consume); 2304 2305 /** 2306 * ring_buffer_read_start - start a non consuming read of the buffer 2307 * @buffer: The ring buffer to read from 2308 * @cpu: The cpu buffer to iterate over 2309 * 2310 * This starts up an iteration through the buffer. It also disables 2311 * the recording to the buffer until the reading is finished. 2312 * This prevents the reading from being corrupted. This is not 2313 * a consuming read, so a producer is not expected. 2314 * 2315 * Must be paired with ring_buffer_finish. 2316 */ 2317 struct ring_buffer_iter * 2318 ring_buffer_read_start(struct ring_buffer *buffer, int cpu) 2319 { 2320 struct ring_buffer_per_cpu *cpu_buffer; 2321 struct ring_buffer_iter *iter; 2322 unsigned long flags; 2323 2324 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2325 return NULL; 2326 2327 iter = kmalloc(sizeof(*iter), GFP_KERNEL); 2328 if (!iter) 2329 return NULL; 2330 2331 cpu_buffer = buffer->buffers[cpu]; 2332 2333 iter->cpu_buffer = cpu_buffer; 2334 2335 atomic_inc(&cpu_buffer->record_disabled); 2336 synchronize_sched(); 2337 2338 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2339 __raw_spin_lock(&cpu_buffer->lock); 2340 rb_iter_reset(iter); 2341 __raw_spin_unlock(&cpu_buffer->lock); 2342 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2343 2344 return iter; 2345 } 2346 EXPORT_SYMBOL_GPL(ring_buffer_read_start); 2347 2348 /** 2349 * ring_buffer_finish - finish reading the iterator of the buffer 2350 * @iter: The iterator retrieved by ring_buffer_start 2351 * 2352 * This re-enables the recording to the buffer, and frees the 2353 * iterator. 2354 */ 2355 void 2356 ring_buffer_read_finish(struct ring_buffer_iter *iter) 2357 { 2358 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 2359 2360 atomic_dec(&cpu_buffer->record_disabled); 2361 kfree(iter); 2362 } 2363 EXPORT_SYMBOL_GPL(ring_buffer_read_finish); 2364 2365 /** 2366 * ring_buffer_read - read the next item in the ring buffer by the iterator 2367 * @iter: The ring buffer iterator 2368 * @ts: The time stamp of the event read. 2369 * 2370 * This reads the next event in the ring buffer and increments the iterator. 2371 */ 2372 struct ring_buffer_event * 2373 ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) 2374 { 2375 struct ring_buffer_event *event; 2376 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 2377 unsigned long flags; 2378 2379 again: 2380 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2381 event = rb_iter_peek(iter, ts); 2382 if (!event) 2383 goto out; 2384 2385 rb_advance_iter(iter); 2386 out: 2387 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2388 2389 if (event && event->type == RINGBUF_TYPE_PADDING) { 2390 cpu_relax(); 2391 goto again; 2392 } 2393 2394 return event; 2395 } 2396 EXPORT_SYMBOL_GPL(ring_buffer_read); 2397 2398 /** 2399 * ring_buffer_size - return the size of the ring buffer (in bytes) 2400 * @buffer: The ring buffer. 2401 */ 2402 unsigned long ring_buffer_size(struct ring_buffer *buffer) 2403 { 2404 return BUF_PAGE_SIZE * buffer->pages; 2405 } 2406 EXPORT_SYMBOL_GPL(ring_buffer_size); 2407 2408 static void 2409 rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) 2410 { 2411 cpu_buffer->head_page 2412 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 2413 local_set(&cpu_buffer->head_page->write, 0); 2414 local_set(&cpu_buffer->head_page->page->commit, 0); 2415 2416 cpu_buffer->head_page->read = 0; 2417 2418 cpu_buffer->tail_page = cpu_buffer->head_page; 2419 cpu_buffer->commit_page = cpu_buffer->head_page; 2420 2421 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 2422 local_set(&cpu_buffer->reader_page->write, 0); 2423 local_set(&cpu_buffer->reader_page->page->commit, 0); 2424 cpu_buffer->reader_page->read = 0; 2425 2426 cpu_buffer->overrun = 0; 2427 cpu_buffer->entries = 0; 2428 2429 cpu_buffer->write_stamp = 0; 2430 cpu_buffer->read_stamp = 0; 2431 } 2432 2433 /** 2434 * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer 2435 * @buffer: The ring buffer to reset a per cpu buffer of 2436 * @cpu: The CPU buffer to be reset 2437 */ 2438 void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) 2439 { 2440 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 2441 unsigned long flags; 2442 2443 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2444 return; 2445 2446 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2447 2448 __raw_spin_lock(&cpu_buffer->lock); 2449 2450 rb_reset_cpu(cpu_buffer); 2451 2452 __raw_spin_unlock(&cpu_buffer->lock); 2453 2454 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2455 } 2456 EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); 2457 2458 /** 2459 * ring_buffer_reset - reset a ring buffer 2460 * @buffer: The ring buffer to reset all cpu buffers 2461 */ 2462 void ring_buffer_reset(struct ring_buffer *buffer) 2463 { 2464 int cpu; 2465 2466 for_each_buffer_cpu(buffer, cpu) 2467 ring_buffer_reset_cpu(buffer, cpu); 2468 } 2469 EXPORT_SYMBOL_GPL(ring_buffer_reset); 2470 2471 /** 2472 * rind_buffer_empty - is the ring buffer empty? 2473 * @buffer: The ring buffer to test 2474 */ 2475 int ring_buffer_empty(struct ring_buffer *buffer) 2476 { 2477 struct ring_buffer_per_cpu *cpu_buffer; 2478 int cpu; 2479 2480 /* yes this is racy, but if you don't like the race, lock the buffer */ 2481 for_each_buffer_cpu(buffer, cpu) { 2482 cpu_buffer = buffer->buffers[cpu]; 2483 if (!rb_per_cpu_empty(cpu_buffer)) 2484 return 0; 2485 } 2486 2487 return 1; 2488 } 2489 EXPORT_SYMBOL_GPL(ring_buffer_empty); 2490 2491 /** 2492 * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty? 2493 * @buffer: The ring buffer 2494 * @cpu: The CPU buffer to test 2495 */ 2496 int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) 2497 { 2498 struct ring_buffer_per_cpu *cpu_buffer; 2499 int ret; 2500 2501 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2502 return 1; 2503 2504 cpu_buffer = buffer->buffers[cpu]; 2505 ret = rb_per_cpu_empty(cpu_buffer); 2506 2507 2508 return ret; 2509 } 2510 EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); 2511 2512 /** 2513 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers 2514 * @buffer_a: One buffer to swap with 2515 * @buffer_b: The other buffer to swap with 2516 * 2517 * This function is useful for tracers that want to take a "snapshot" 2518 * of a CPU buffer and has another back up buffer lying around. 2519 * it is expected that the tracer handles the cpu buffer not being 2520 * used at the moment. 2521 */ 2522 int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, 2523 struct ring_buffer *buffer_b, int cpu) 2524 { 2525 struct ring_buffer_per_cpu *cpu_buffer_a; 2526 struct ring_buffer_per_cpu *cpu_buffer_b; 2527 int ret = -EINVAL; 2528 2529 if (!cpumask_test_cpu(cpu, buffer_a->cpumask) || 2530 !cpumask_test_cpu(cpu, buffer_b->cpumask)) 2531 goto out; 2532 2533 /* At least make sure the two buffers are somewhat the same */ 2534 if (buffer_a->pages != buffer_b->pages) 2535 goto out; 2536 2537 ret = -EAGAIN; 2538 2539 if (ring_buffer_flags != RB_BUFFERS_ON) 2540 goto out; 2541 2542 if (atomic_read(&buffer_a->record_disabled)) 2543 goto out; 2544 2545 if (atomic_read(&buffer_b->record_disabled)) 2546 goto out; 2547 2548 cpu_buffer_a = buffer_a->buffers[cpu]; 2549 cpu_buffer_b = buffer_b->buffers[cpu]; 2550 2551 if (atomic_read(&cpu_buffer_a->record_disabled)) 2552 goto out; 2553 2554 if (atomic_read(&cpu_buffer_b->record_disabled)) 2555 goto out; 2556 2557 /* 2558 * We can't do a synchronize_sched here because this 2559 * function can be called in atomic context. 2560 * Normally this will be called from the same CPU as cpu. 2561 * If not it's up to the caller to protect this. 2562 */ 2563 atomic_inc(&cpu_buffer_a->record_disabled); 2564 atomic_inc(&cpu_buffer_b->record_disabled); 2565 2566 buffer_a->buffers[cpu] = cpu_buffer_b; 2567 buffer_b->buffers[cpu] = cpu_buffer_a; 2568 2569 cpu_buffer_b->buffer = buffer_a; 2570 cpu_buffer_a->buffer = buffer_b; 2571 2572 atomic_dec(&cpu_buffer_a->record_disabled); 2573 atomic_dec(&cpu_buffer_b->record_disabled); 2574 2575 ret = 0; 2576 out: 2577 return ret; 2578 } 2579 EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); 2580 2581 static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, 2582 struct buffer_data_page *bpage, 2583 unsigned int offset) 2584 { 2585 struct ring_buffer_event *event; 2586 unsigned long head; 2587 2588 __raw_spin_lock(&cpu_buffer->lock); 2589 for (head = offset; head < local_read(&bpage->commit); 2590 head += rb_event_length(event)) { 2591 2592 event = __rb_data_page_index(bpage, head); 2593 if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) 2594 return; 2595 /* Only count data entries */ 2596 if (event->type != RINGBUF_TYPE_DATA) 2597 continue; 2598 cpu_buffer->entries--; 2599 } 2600 __raw_spin_unlock(&cpu_buffer->lock); 2601 } 2602 2603 /** 2604 * ring_buffer_alloc_read_page - allocate a page to read from buffer 2605 * @buffer: the buffer to allocate for. 2606 * 2607 * This function is used in conjunction with ring_buffer_read_page. 2608 * When reading a full page from the ring buffer, these functions 2609 * can be used to speed up the process. The calling function should 2610 * allocate a few pages first with this function. Then when it 2611 * needs to get pages from the ring buffer, it passes the result 2612 * of this function into ring_buffer_read_page, which will swap 2613 * the page that was allocated, with the read page of the buffer. 2614 * 2615 * Returns: 2616 * The page allocated, or NULL on error. 2617 */ 2618 void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) 2619 { 2620 struct buffer_data_page *bpage; 2621 unsigned long addr; 2622 2623 addr = __get_free_page(GFP_KERNEL); 2624 if (!addr) 2625 return NULL; 2626 2627 bpage = (void *)addr; 2628 2629 rb_init_page(bpage); 2630 2631 return bpage; 2632 } 2633 2634 /** 2635 * ring_buffer_free_read_page - free an allocated read page 2636 * @buffer: the buffer the page was allocate for 2637 * @data: the page to free 2638 * 2639 * Free a page allocated from ring_buffer_alloc_read_page. 2640 */ 2641 void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) 2642 { 2643 free_page((unsigned long)data); 2644 } 2645 2646 /** 2647 * ring_buffer_read_page - extract a page from the ring buffer 2648 * @buffer: buffer to extract from 2649 * @data_page: the page to use allocated from ring_buffer_alloc_read_page 2650 * @len: amount to extract 2651 * @cpu: the cpu of the buffer to extract 2652 * @full: should the extraction only happen when the page is full. 2653 * 2654 * This function will pull out a page from the ring buffer and consume it. 2655 * @data_page must be the address of the variable that was returned 2656 * from ring_buffer_alloc_read_page. This is because the page might be used 2657 * to swap with a page in the ring buffer. 2658 * 2659 * for example: 2660 * rpage = ring_buffer_alloc_read_page(buffer); 2661 * if (!rpage) 2662 * return error; 2663 * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); 2664 * if (ret >= 0) 2665 * process_page(rpage, ret); 2666 * 2667 * When @full is set, the function will not return true unless 2668 * the writer is off the reader page. 2669 * 2670 * Note: it is up to the calling functions to handle sleeps and wakeups. 2671 * The ring buffer can be used anywhere in the kernel and can not 2672 * blindly call wake_up. The layer that uses the ring buffer must be 2673 * responsible for that. 2674 * 2675 * Returns: 2676 * >=0 if data has been transferred, returns the offset of consumed data. 2677 * <0 if no data has been transferred. 2678 */ 2679 int ring_buffer_read_page(struct ring_buffer *buffer, 2680 void **data_page, size_t len, int cpu, int full) 2681 { 2682 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 2683 struct ring_buffer_event *event; 2684 struct buffer_data_page *bpage; 2685 struct buffer_page *reader; 2686 unsigned long flags; 2687 unsigned int commit; 2688 unsigned int read; 2689 u64 save_timestamp; 2690 int ret = -1; 2691 2692 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2693 goto out; 2694 2695 /* 2696 * If len is not big enough to hold the page header, then 2697 * we can not copy anything. 2698 */ 2699 if (len <= BUF_PAGE_HDR_SIZE) 2700 goto out; 2701 2702 len -= BUF_PAGE_HDR_SIZE; 2703 2704 if (!data_page) 2705 goto out; 2706 2707 bpage = *data_page; 2708 if (!bpage) 2709 goto out; 2710 2711 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2712 2713 reader = rb_get_reader_page(cpu_buffer); 2714 if (!reader) 2715 goto out_unlock; 2716 2717 event = rb_reader_event(cpu_buffer); 2718 2719 read = reader->read; 2720 commit = rb_page_commit(reader); 2721 2722 /* 2723 * If this page has been partially read or 2724 * if len is not big enough to read the rest of the page or 2725 * a writer is still on the page, then 2726 * we must copy the data from the page to the buffer. 2727 * Otherwise, we can simply swap the page with the one passed in. 2728 */ 2729 if (read || (len < (commit - read)) || 2730 cpu_buffer->reader_page == cpu_buffer->commit_page) { 2731 struct buffer_data_page *rpage = cpu_buffer->reader_page->page; 2732 unsigned int rpos = read; 2733 unsigned int pos = 0; 2734 unsigned int size; 2735 2736 if (full) 2737 goto out_unlock; 2738 2739 if (len > (commit - read)) 2740 len = (commit - read); 2741 2742 size = rb_event_length(event); 2743 2744 if (len < size) 2745 goto out_unlock; 2746 2747 /* save the current timestamp, since the user will need it */ 2748 save_timestamp = cpu_buffer->read_stamp; 2749 2750 /* Need to copy one event at a time */ 2751 do { 2752 memcpy(bpage->data + pos, rpage->data + rpos, size); 2753 2754 len -= size; 2755 2756 rb_advance_reader(cpu_buffer); 2757 rpos = reader->read; 2758 pos += size; 2759 2760 event = rb_reader_event(cpu_buffer); 2761 size = rb_event_length(event); 2762 } while (len > size); 2763 2764 /* update bpage */ 2765 local_set(&bpage->commit, pos); 2766 bpage->time_stamp = save_timestamp; 2767 2768 /* we copied everything to the beginning */ 2769 read = 0; 2770 } else { 2771 /* swap the pages */ 2772 rb_init_page(bpage); 2773 bpage = reader->page; 2774 reader->page = *data_page; 2775 local_set(&reader->write, 0); 2776 reader->read = 0; 2777 *data_page = bpage; 2778 2779 /* update the entry counter */ 2780 rb_remove_entries(cpu_buffer, bpage, read); 2781 } 2782 ret = read; 2783 2784 out_unlock: 2785 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2786 2787 out: 2788 return ret; 2789 } 2790 2791 static ssize_t 2792 rb_simple_read(struct file *filp, char __user *ubuf, 2793 size_t cnt, loff_t *ppos) 2794 { 2795 unsigned long *p = filp->private_data; 2796 char buf[64]; 2797 int r; 2798 2799 if (test_bit(RB_BUFFERS_DISABLED_BIT, p)) 2800 r = sprintf(buf, "permanently disabled\n"); 2801 else 2802 r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p)); 2803 2804 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2805 } 2806 2807 static ssize_t 2808 rb_simple_write(struct file *filp, const char __user *ubuf, 2809 size_t cnt, loff_t *ppos) 2810 { 2811 unsigned long *p = filp->private_data; 2812 char buf[64]; 2813 unsigned long val; 2814 int ret; 2815 2816 if (cnt >= sizeof(buf)) 2817 return -EINVAL; 2818 2819 if (copy_from_user(&buf, ubuf, cnt)) 2820 return -EFAULT; 2821 2822 buf[cnt] = 0; 2823 2824 ret = strict_strtoul(buf, 10, &val); 2825 if (ret < 0) 2826 return ret; 2827 2828 if (val) 2829 set_bit(RB_BUFFERS_ON_BIT, p); 2830 else 2831 clear_bit(RB_BUFFERS_ON_BIT, p); 2832 2833 (*ppos)++; 2834 2835 return cnt; 2836 } 2837 2838 static const struct file_operations rb_simple_fops = { 2839 .open = tracing_open_generic, 2840 .read = rb_simple_read, 2841 .write = rb_simple_write, 2842 }; 2843 2844 2845 static __init int rb_init_debugfs(void) 2846 { 2847 struct dentry *d_tracer; 2848 struct dentry *entry; 2849 2850 d_tracer = tracing_init_dentry(); 2851 2852 entry = debugfs_create_file("tracing_on", 0644, d_tracer, 2853 &ring_buffer_flags, &rb_simple_fops); 2854 if (!entry) 2855 pr_warning("Could not create debugfs 'tracing_on' entry\n"); 2856 2857 return 0; 2858 } 2859 2860 fs_initcall(rb_init_debugfs); 2861 2862 #ifdef CONFIG_HOTPLUG_CPU 2863 static int rb_cpu_notify(struct notifier_block *self, 2864 unsigned long action, void *hcpu) 2865 { 2866 struct ring_buffer *buffer = 2867 container_of(self, struct ring_buffer, cpu_notify); 2868 long cpu = (long)hcpu; 2869 2870 switch (action) { 2871 case CPU_UP_PREPARE: 2872 case CPU_UP_PREPARE_FROZEN: 2873 if (cpu_isset(cpu, *buffer->cpumask)) 2874 return NOTIFY_OK; 2875 2876 buffer->buffers[cpu] = 2877 rb_allocate_cpu_buffer(buffer, cpu); 2878 if (!buffer->buffers[cpu]) { 2879 WARN(1, "failed to allocate ring buffer on CPU %ld\n", 2880 cpu); 2881 return NOTIFY_OK; 2882 } 2883 smp_wmb(); 2884 cpu_set(cpu, *buffer->cpumask); 2885 break; 2886 case CPU_DOWN_PREPARE: 2887 case CPU_DOWN_PREPARE_FROZEN: 2888 /* 2889 * Do nothing. 2890 * If we were to free the buffer, then the user would 2891 * lose any trace that was in the buffer. 2892 */ 2893 break; 2894 default: 2895 break; 2896 } 2897 return NOTIFY_OK; 2898 } 2899 #endif 2900