1 /* 2 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License version 2 as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * 17 */ 18 #include <linux/kernel.h> 19 #include <linux/blkdev.h> 20 #include <linux/blktrace_api.h> 21 #include <linux/percpu.h> 22 #include <linux/init.h> 23 #include <linux/mutex.h> 24 #include <linux/debugfs.h> 25 #include <linux/time.h> 26 #include <trace/block.h> 27 #include <linux/uaccess.h> 28 #include "trace_output.h" 29 30 static unsigned int blktrace_seq __read_mostly = 1; 31 32 static struct trace_array *blk_tr; 33 static bool blk_tracer_enabled __read_mostly; 34 35 /* Select an alternative, minimalistic output than the original one */ 36 #define TRACE_BLK_OPT_CLASSIC 0x1 37 38 static struct tracer_opt blk_tracer_opts[] = { 39 /* Default disable the minimalistic output */ 40 { TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) }, 41 { } 42 }; 43 44 static struct tracer_flags blk_tracer_flags = { 45 .val = 0, 46 .opts = blk_tracer_opts, 47 }; 48 49 /* Global reference count of probes */ 50 static atomic_t blk_probes_ref = ATOMIC_INIT(0); 51 52 static void blk_register_tracepoints(void); 53 static void blk_unregister_tracepoints(void); 54 55 /* 56 * Send out a notify message. 57 */ 58 static void trace_note(struct blk_trace *bt, pid_t pid, int action, 59 const void *data, size_t len) 60 { 61 struct blk_io_trace *t; 62 struct ring_buffer_event *event = NULL; 63 int pc = 0; 64 int cpu = smp_processor_id(); 65 bool blk_tracer = blk_tracer_enabled; 66 67 if (blk_tracer) { 68 pc = preempt_count(); 69 event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, 70 sizeof(*t) + len, 71 0, pc); 72 if (!event) 73 return; 74 t = ring_buffer_event_data(event); 75 goto record_it; 76 } 77 78 if (!bt->rchan) 79 return; 80 81 t = relay_reserve(bt->rchan, sizeof(*t) + len); 82 if (t) { 83 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; 84 t->time = ktime_to_ns(ktime_get()); 85 record_it: 86 t->device = bt->dev; 87 t->action = action; 88 t->pid = pid; 89 t->cpu = cpu; 90 t->pdu_len = len; 91 memcpy((void *) t + sizeof(*t), data, len); 92 93 if (blk_tracer) 94 trace_buffer_unlock_commit(blk_tr, event, 0, pc); 95 } 96 } 97 98 /* 99 * Send out a notify for this process, if we haven't done so since a trace 100 * started 101 */ 102 static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk) 103 { 104 tsk->btrace_seq = blktrace_seq; 105 trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm)); 106 } 107 108 static void trace_note_time(struct blk_trace *bt) 109 { 110 struct timespec now; 111 unsigned long flags; 112 u32 words[2]; 113 114 getnstimeofday(&now); 115 words[0] = now.tv_sec; 116 words[1] = now.tv_nsec; 117 118 local_irq_save(flags); 119 trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words)); 120 local_irq_restore(flags); 121 } 122 123 void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) 124 { 125 int n; 126 va_list args; 127 unsigned long flags; 128 char *buf; 129 130 if (unlikely(bt->trace_state != Blktrace_running && 131 !blk_tracer_enabled)) 132 return; 133 134 local_irq_save(flags); 135 buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); 136 va_start(args, fmt); 137 n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args); 138 va_end(args); 139 140 trace_note(bt, 0, BLK_TN_MESSAGE, buf, n); 141 local_irq_restore(flags); 142 } 143 EXPORT_SYMBOL_GPL(__trace_note_message); 144 145 static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, 146 pid_t pid) 147 { 148 if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) 149 return 1; 150 if (sector < bt->start_lba || sector > bt->end_lba) 151 return 1; 152 if (bt->pid && pid != bt->pid) 153 return 1; 154 155 return 0; 156 } 157 158 /* 159 * Data direction bit lookup 160 */ 161 static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), 162 BLK_TC_ACT(BLK_TC_WRITE) }; 163 164 /* The ilog2() calls fall out because they're constant */ 165 #define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \ 166 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name)) 167 168 /* 169 * The worker for the various blk_add_trace*() types. Fills out a 170 * blk_io_trace structure and places it in a per-cpu subbuffer. 171 */ 172 static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, 173 int rw, u32 what, int error, int pdu_len, void *pdu_data) 174 { 175 struct task_struct *tsk = current; 176 struct ring_buffer_event *event = NULL; 177 struct blk_io_trace *t; 178 unsigned long flags = 0; 179 unsigned long *sequence; 180 pid_t pid; 181 int cpu, pc = 0; 182 bool blk_tracer = blk_tracer_enabled; 183 184 if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) 185 return; 186 187 what |= ddir_act[rw & WRITE]; 188 what |= MASK_TC_BIT(rw, BARRIER); 189 what |= MASK_TC_BIT(rw, SYNCIO); 190 what |= MASK_TC_BIT(rw, AHEAD); 191 what |= MASK_TC_BIT(rw, META); 192 what |= MASK_TC_BIT(rw, DISCARD); 193 194 pid = tsk->pid; 195 if (unlikely(act_log_check(bt, what, sector, pid))) 196 return; 197 cpu = raw_smp_processor_id(); 198 199 if (blk_tracer) { 200 tracing_record_cmdline(current); 201 202 pc = preempt_count(); 203 event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, 204 sizeof(*t) + pdu_len, 205 0, pc); 206 if (!event) 207 return; 208 t = ring_buffer_event_data(event); 209 goto record_it; 210 } 211 212 /* 213 * A word about the locking here - we disable interrupts to reserve 214 * some space in the relay per-cpu buffer, to prevent an irq 215 * from coming in and stepping on our toes. 216 */ 217 local_irq_save(flags); 218 219 if (unlikely(tsk->btrace_seq != blktrace_seq)) 220 trace_note_tsk(bt, tsk); 221 222 t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len); 223 if (t) { 224 sequence = per_cpu_ptr(bt->sequence, cpu); 225 226 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; 227 t->sequence = ++(*sequence); 228 t->time = ktime_to_ns(ktime_get()); 229 record_it: 230 /* 231 * These two are not needed in ftrace as they are in the 232 * generic trace_entry, filled by tracing_generic_entry_update, 233 * but for the trace_event->bin() synthesizer benefit we do it 234 * here too. 235 */ 236 t->cpu = cpu; 237 t->pid = pid; 238 239 t->sector = sector; 240 t->bytes = bytes; 241 t->action = what; 242 t->device = bt->dev; 243 t->error = error; 244 t->pdu_len = pdu_len; 245 246 if (pdu_len) 247 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); 248 249 if (blk_tracer) { 250 trace_buffer_unlock_commit(blk_tr, event, 0, pc); 251 return; 252 } 253 } 254 255 local_irq_restore(flags); 256 } 257 258 static struct dentry *blk_tree_root; 259 static DEFINE_MUTEX(blk_tree_mutex); 260 261 static void blk_trace_free(struct blk_trace *bt) 262 { 263 debugfs_remove(bt->msg_file); 264 debugfs_remove(bt->dropped_file); 265 relay_close(bt->rchan); 266 free_percpu(bt->sequence); 267 free_percpu(bt->msg_data); 268 kfree(bt); 269 } 270 271 static void blk_trace_cleanup(struct blk_trace *bt) 272 { 273 blk_trace_free(bt); 274 if (atomic_dec_and_test(&blk_probes_ref)) 275 blk_unregister_tracepoints(); 276 } 277 278 int blk_trace_remove(struct request_queue *q) 279 { 280 struct blk_trace *bt; 281 282 bt = xchg(&q->blk_trace, NULL); 283 if (!bt) 284 return -EINVAL; 285 286 if (bt->trace_state != Blktrace_running) 287 blk_trace_cleanup(bt); 288 289 return 0; 290 } 291 EXPORT_SYMBOL_GPL(blk_trace_remove); 292 293 static int blk_dropped_open(struct inode *inode, struct file *filp) 294 { 295 filp->private_data = inode->i_private; 296 297 return 0; 298 } 299 300 static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, 301 size_t count, loff_t *ppos) 302 { 303 struct blk_trace *bt = filp->private_data; 304 char buf[16]; 305 306 snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped)); 307 308 return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf)); 309 } 310 311 static const struct file_operations blk_dropped_fops = { 312 .owner = THIS_MODULE, 313 .open = blk_dropped_open, 314 .read = blk_dropped_read, 315 }; 316 317 static int blk_msg_open(struct inode *inode, struct file *filp) 318 { 319 filp->private_data = inode->i_private; 320 321 return 0; 322 } 323 324 static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, 325 size_t count, loff_t *ppos) 326 { 327 char *msg; 328 struct blk_trace *bt; 329 330 if (count >= BLK_TN_MAX_MSG) 331 return -EINVAL; 332 333 msg = kmalloc(count + 1, GFP_KERNEL); 334 if (msg == NULL) 335 return -ENOMEM; 336 337 if (copy_from_user(msg, buffer, count)) { 338 kfree(msg); 339 return -EFAULT; 340 } 341 342 msg[count] = '\0'; 343 bt = filp->private_data; 344 __trace_note_message(bt, "%s", msg); 345 kfree(msg); 346 347 return count; 348 } 349 350 static const struct file_operations blk_msg_fops = { 351 .owner = THIS_MODULE, 352 .open = blk_msg_open, 353 .write = blk_msg_write, 354 }; 355 356 /* 357 * Keep track of how many times we encountered a full subbuffer, to aid 358 * the user space app in telling how many lost events there were. 359 */ 360 static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, 361 void *prev_subbuf, size_t prev_padding) 362 { 363 struct blk_trace *bt; 364 365 if (!relay_buf_full(buf)) 366 return 1; 367 368 bt = buf->chan->private_data; 369 atomic_inc(&bt->dropped); 370 return 0; 371 } 372 373 static int blk_remove_buf_file_callback(struct dentry *dentry) 374 { 375 struct dentry *parent = dentry->d_parent; 376 debugfs_remove(dentry); 377 378 /* 379 * this will fail for all but the last file, but that is ok. what we 380 * care about is the top level buts->name directory going away, when 381 * the last trace file is gone. Then we don't have to rmdir() that 382 * manually on trace stop, so it nicely solves the issue with 383 * force killing of running traces. 384 */ 385 386 debugfs_remove(parent); 387 return 0; 388 } 389 390 static struct dentry *blk_create_buf_file_callback(const char *filename, 391 struct dentry *parent, 392 int mode, 393 struct rchan_buf *buf, 394 int *is_global) 395 { 396 return debugfs_create_file(filename, mode, parent, buf, 397 &relay_file_operations); 398 } 399 400 static struct rchan_callbacks blk_relay_callbacks = { 401 .subbuf_start = blk_subbuf_start_callback, 402 .create_buf_file = blk_create_buf_file_callback, 403 .remove_buf_file = blk_remove_buf_file_callback, 404 }; 405 406 /* 407 * Setup everything required to start tracing 408 */ 409 int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, 410 struct blk_user_trace_setup *buts) 411 { 412 struct blk_trace *old_bt, *bt = NULL; 413 struct dentry *dir = NULL; 414 int ret, i; 415 416 if (!buts->buf_size || !buts->buf_nr) 417 return -EINVAL; 418 419 strncpy(buts->name, name, BLKTRACE_BDEV_SIZE); 420 buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0'; 421 422 /* 423 * some device names have larger paths - convert the slashes 424 * to underscores for this to work as expected 425 */ 426 for (i = 0; i < strlen(buts->name); i++) 427 if (buts->name[i] == '/') 428 buts->name[i] = '_'; 429 430 bt = kzalloc(sizeof(*bt), GFP_KERNEL); 431 if (!bt) 432 return -ENOMEM; 433 434 ret = -ENOMEM; 435 bt->sequence = alloc_percpu(unsigned long); 436 if (!bt->sequence) 437 goto err; 438 439 bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); 440 if (!bt->msg_data) 441 goto err; 442 443 ret = -ENOENT; 444 445 mutex_lock(&blk_tree_mutex); 446 if (!blk_tree_root) { 447 blk_tree_root = debugfs_create_dir("block", NULL); 448 if (!blk_tree_root) { 449 mutex_unlock(&blk_tree_mutex); 450 goto err; 451 } 452 } 453 mutex_unlock(&blk_tree_mutex); 454 455 dir = debugfs_create_dir(buts->name, blk_tree_root); 456 457 if (!dir) 458 goto err; 459 460 bt->dir = dir; 461 bt->dev = dev; 462 atomic_set(&bt->dropped, 0); 463 464 ret = -EIO; 465 bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, 466 &blk_dropped_fops); 467 if (!bt->dropped_file) 468 goto err; 469 470 bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops); 471 if (!bt->msg_file) 472 goto err; 473 474 bt->rchan = relay_open("trace", dir, buts->buf_size, 475 buts->buf_nr, &blk_relay_callbacks, bt); 476 if (!bt->rchan) 477 goto err; 478 479 bt->act_mask = buts->act_mask; 480 if (!bt->act_mask) 481 bt->act_mask = (u16) -1; 482 483 bt->start_lba = buts->start_lba; 484 bt->end_lba = buts->end_lba; 485 if (!bt->end_lba) 486 bt->end_lba = -1ULL; 487 488 bt->pid = buts->pid; 489 bt->trace_state = Blktrace_setup; 490 491 ret = -EBUSY; 492 old_bt = xchg(&q->blk_trace, bt); 493 if (old_bt) { 494 (void) xchg(&q->blk_trace, old_bt); 495 goto err; 496 } 497 498 if (atomic_inc_return(&blk_probes_ref) == 1) 499 blk_register_tracepoints(); 500 501 return 0; 502 err: 503 blk_trace_free(bt); 504 return ret; 505 } 506 507 int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, 508 char __user *arg) 509 { 510 struct blk_user_trace_setup buts; 511 int ret; 512 513 ret = copy_from_user(&buts, arg, sizeof(buts)); 514 if (ret) 515 return -EFAULT; 516 517 ret = do_blk_trace_setup(q, name, dev, &buts); 518 if (ret) 519 return ret; 520 521 if (copy_to_user(arg, &buts, sizeof(buts))) 522 return -EFAULT; 523 524 return 0; 525 } 526 EXPORT_SYMBOL_GPL(blk_trace_setup); 527 528 int blk_trace_startstop(struct request_queue *q, int start) 529 { 530 int ret; 531 struct blk_trace *bt = q->blk_trace; 532 533 if (bt == NULL) 534 return -EINVAL; 535 536 /* 537 * For starting a trace, we can transition from a setup or stopped 538 * trace. For stopping a trace, the state must be running 539 */ 540 ret = -EINVAL; 541 if (start) { 542 if (bt->trace_state == Blktrace_setup || 543 bt->trace_state == Blktrace_stopped) { 544 blktrace_seq++; 545 smp_mb(); 546 bt->trace_state = Blktrace_running; 547 548 trace_note_time(bt); 549 ret = 0; 550 } 551 } else { 552 if (bt->trace_state == Blktrace_running) { 553 bt->trace_state = Blktrace_stopped; 554 relay_flush(bt->rchan); 555 ret = 0; 556 } 557 } 558 559 return ret; 560 } 561 EXPORT_SYMBOL_GPL(blk_trace_startstop); 562 563 /** 564 * blk_trace_ioctl: - handle the ioctls associated with tracing 565 * @bdev: the block device 566 * @cmd: the ioctl cmd 567 * @arg: the argument data, if any 568 * 569 **/ 570 int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) 571 { 572 struct request_queue *q; 573 int ret, start = 0; 574 char b[BDEVNAME_SIZE]; 575 576 q = bdev_get_queue(bdev); 577 if (!q) 578 return -ENXIO; 579 580 mutex_lock(&bdev->bd_mutex); 581 582 switch (cmd) { 583 case BLKTRACESETUP: 584 bdevname(bdev, b); 585 ret = blk_trace_setup(q, b, bdev->bd_dev, arg); 586 break; 587 case BLKTRACESTART: 588 start = 1; 589 case BLKTRACESTOP: 590 ret = blk_trace_startstop(q, start); 591 break; 592 case BLKTRACETEARDOWN: 593 ret = blk_trace_remove(q); 594 break; 595 default: 596 ret = -ENOTTY; 597 break; 598 } 599 600 mutex_unlock(&bdev->bd_mutex); 601 return ret; 602 } 603 604 /** 605 * blk_trace_shutdown: - stop and cleanup trace structures 606 * @q: the request queue associated with the device 607 * 608 **/ 609 void blk_trace_shutdown(struct request_queue *q) 610 { 611 if (q->blk_trace) { 612 blk_trace_startstop(q, 0); 613 blk_trace_remove(q); 614 } 615 } 616 617 /* 618 * blktrace probes 619 */ 620 621 /** 622 * blk_add_trace_rq - Add a trace for a request oriented action 623 * @q: queue the io is for 624 * @rq: the source request 625 * @what: the action 626 * 627 * Description: 628 * Records an action against a request. Will log the bio offset + size. 629 * 630 **/ 631 static void blk_add_trace_rq(struct request_queue *q, struct request *rq, 632 u32 what) 633 { 634 struct blk_trace *bt = q->blk_trace; 635 int rw = rq->cmd_flags & 0x03; 636 637 if (likely(!bt)) 638 return; 639 640 if (blk_discard_rq(rq)) 641 rw |= (1 << BIO_RW_DISCARD); 642 643 if (blk_pc_request(rq)) { 644 what |= BLK_TC_ACT(BLK_TC_PC); 645 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, 646 rq->cmd_len, rq->cmd); 647 } else { 648 what |= BLK_TC_ACT(BLK_TC_FS); 649 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, 650 rw, what, rq->errors, 0, NULL); 651 } 652 } 653 654 static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq) 655 { 656 blk_add_trace_rq(q, rq, BLK_TA_ABORT); 657 } 658 659 static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq) 660 { 661 blk_add_trace_rq(q, rq, BLK_TA_INSERT); 662 } 663 664 static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq) 665 { 666 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 667 } 668 669 static void blk_add_trace_rq_requeue(struct request_queue *q, 670 struct request *rq) 671 { 672 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 673 } 674 675 static void blk_add_trace_rq_complete(struct request_queue *q, 676 struct request *rq) 677 { 678 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); 679 } 680 681 /** 682 * blk_add_trace_bio - Add a trace for a bio oriented action 683 * @q: queue the io is for 684 * @bio: the source bio 685 * @what: the action 686 * 687 * Description: 688 * Records an action against a bio. Will log the bio offset + size. 689 * 690 **/ 691 static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, 692 u32 what) 693 { 694 struct blk_trace *bt = q->blk_trace; 695 696 if (likely(!bt)) 697 return; 698 699 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, 700 !bio_flagged(bio, BIO_UPTODATE), 0, NULL); 701 } 702 703 static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio) 704 { 705 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); 706 } 707 708 static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio) 709 { 710 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); 711 } 712 713 static void blk_add_trace_bio_backmerge(struct request_queue *q, 714 struct bio *bio) 715 { 716 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); 717 } 718 719 static void blk_add_trace_bio_frontmerge(struct request_queue *q, 720 struct bio *bio) 721 { 722 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); 723 } 724 725 static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio) 726 { 727 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 728 } 729 730 static void blk_add_trace_getrq(struct request_queue *q, 731 struct bio *bio, int rw) 732 { 733 if (bio) 734 blk_add_trace_bio(q, bio, BLK_TA_GETRQ); 735 else { 736 struct blk_trace *bt = q->blk_trace; 737 738 if (bt) 739 __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL); 740 } 741 } 742 743 744 static void blk_add_trace_sleeprq(struct request_queue *q, 745 struct bio *bio, int rw) 746 { 747 if (bio) 748 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ); 749 else { 750 struct blk_trace *bt = q->blk_trace; 751 752 if (bt) 753 __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 754 0, 0, NULL); 755 } 756 } 757 758 static void blk_add_trace_plug(struct request_queue *q) 759 { 760 struct blk_trace *bt = q->blk_trace; 761 762 if (bt) 763 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); 764 } 765 766 static void blk_add_trace_unplug_io(struct request_queue *q) 767 { 768 struct blk_trace *bt = q->blk_trace; 769 770 if (bt) { 771 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; 772 __be64 rpdu = cpu_to_be64(pdu); 773 774 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0, 775 sizeof(rpdu), &rpdu); 776 } 777 } 778 779 static void blk_add_trace_unplug_timer(struct request_queue *q) 780 { 781 struct blk_trace *bt = q->blk_trace; 782 783 if (bt) { 784 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; 785 __be64 rpdu = cpu_to_be64(pdu); 786 787 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0, 788 sizeof(rpdu), &rpdu); 789 } 790 } 791 792 static void blk_add_trace_split(struct request_queue *q, struct bio *bio, 793 unsigned int pdu) 794 { 795 struct blk_trace *bt = q->blk_trace; 796 797 if (bt) { 798 __be64 rpdu = cpu_to_be64(pdu); 799 800 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, 801 BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE), 802 sizeof(rpdu), &rpdu); 803 } 804 } 805 806 /** 807 * blk_add_trace_remap - Add a trace for a remap operation 808 * @q: queue the io is for 809 * @bio: the source bio 810 * @dev: target device 811 * @from: source sector 812 * @to: target sector 813 * 814 * Description: 815 * Device mapper or raid target sometimes need to split a bio because 816 * it spans a stripe (or similar). Add a trace for that action. 817 * 818 **/ 819 static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, 820 dev_t dev, sector_t from, sector_t to) 821 { 822 struct blk_trace *bt = q->blk_trace; 823 struct blk_io_trace_remap r; 824 825 if (likely(!bt)) 826 return; 827 828 r.device = cpu_to_be32(dev); 829 r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev); 830 r.sector = cpu_to_be64(to); 831 832 __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, 833 !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); 834 } 835 836 /** 837 * blk_add_driver_data - Add binary message with driver-specific data 838 * @q: queue the io is for 839 * @rq: io request 840 * @data: driver-specific data 841 * @len: length of driver-specific data 842 * 843 * Description: 844 * Some drivers might want to write driver-specific data per request. 845 * 846 **/ 847 void blk_add_driver_data(struct request_queue *q, 848 struct request *rq, 849 void *data, size_t len) 850 { 851 struct blk_trace *bt = q->blk_trace; 852 853 if (likely(!bt)) 854 return; 855 856 if (blk_pc_request(rq)) 857 __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, 858 rq->errors, len, data); 859 else 860 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, 861 0, BLK_TA_DRV_DATA, rq->errors, len, data); 862 } 863 EXPORT_SYMBOL_GPL(blk_add_driver_data); 864 865 static void blk_register_tracepoints(void) 866 { 867 int ret; 868 869 ret = register_trace_block_rq_abort(blk_add_trace_rq_abort); 870 WARN_ON(ret); 871 ret = register_trace_block_rq_insert(blk_add_trace_rq_insert); 872 WARN_ON(ret); 873 ret = register_trace_block_rq_issue(blk_add_trace_rq_issue); 874 WARN_ON(ret); 875 ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue); 876 WARN_ON(ret); 877 ret = register_trace_block_rq_complete(blk_add_trace_rq_complete); 878 WARN_ON(ret); 879 ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce); 880 WARN_ON(ret); 881 ret = register_trace_block_bio_complete(blk_add_trace_bio_complete); 882 WARN_ON(ret); 883 ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); 884 WARN_ON(ret); 885 ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); 886 WARN_ON(ret); 887 ret = register_trace_block_bio_queue(blk_add_trace_bio_queue); 888 WARN_ON(ret); 889 ret = register_trace_block_getrq(blk_add_trace_getrq); 890 WARN_ON(ret); 891 ret = register_trace_block_sleeprq(blk_add_trace_sleeprq); 892 WARN_ON(ret); 893 ret = register_trace_block_plug(blk_add_trace_plug); 894 WARN_ON(ret); 895 ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer); 896 WARN_ON(ret); 897 ret = register_trace_block_unplug_io(blk_add_trace_unplug_io); 898 WARN_ON(ret); 899 ret = register_trace_block_split(blk_add_trace_split); 900 WARN_ON(ret); 901 ret = register_trace_block_remap(blk_add_trace_remap); 902 WARN_ON(ret); 903 } 904 905 static void blk_unregister_tracepoints(void) 906 { 907 unregister_trace_block_remap(blk_add_trace_remap); 908 unregister_trace_block_split(blk_add_trace_split); 909 unregister_trace_block_unplug_io(blk_add_trace_unplug_io); 910 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer); 911 unregister_trace_block_plug(blk_add_trace_plug); 912 unregister_trace_block_sleeprq(blk_add_trace_sleeprq); 913 unregister_trace_block_getrq(blk_add_trace_getrq); 914 unregister_trace_block_bio_queue(blk_add_trace_bio_queue); 915 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); 916 unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); 917 unregister_trace_block_bio_complete(blk_add_trace_bio_complete); 918 unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce); 919 unregister_trace_block_rq_complete(blk_add_trace_rq_complete); 920 unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue); 921 unregister_trace_block_rq_issue(blk_add_trace_rq_issue); 922 unregister_trace_block_rq_insert(blk_add_trace_rq_insert); 923 unregister_trace_block_rq_abort(blk_add_trace_rq_abort); 924 925 tracepoint_synchronize_unregister(); 926 } 927 928 /* 929 * struct blk_io_tracer formatting routines 930 */ 931 932 static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) 933 { 934 int i = 0; 935 int tc = t->action >> BLK_TC_SHIFT; 936 937 if (t->action == BLK_TN_MESSAGE) { 938 rwbs[i++] = 'N'; 939 goto out; 940 } 941 942 if (tc & BLK_TC_DISCARD) 943 rwbs[i++] = 'D'; 944 else if (tc & BLK_TC_WRITE) 945 rwbs[i++] = 'W'; 946 else if (t->bytes) 947 rwbs[i++] = 'R'; 948 else 949 rwbs[i++] = 'N'; 950 951 if (tc & BLK_TC_AHEAD) 952 rwbs[i++] = 'A'; 953 if (tc & BLK_TC_BARRIER) 954 rwbs[i++] = 'B'; 955 if (tc & BLK_TC_SYNC) 956 rwbs[i++] = 'S'; 957 if (tc & BLK_TC_META) 958 rwbs[i++] = 'M'; 959 out: 960 rwbs[i] = '\0'; 961 } 962 963 static inline 964 const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent) 965 { 966 return (const struct blk_io_trace *)ent; 967 } 968 969 static inline const void *pdu_start(const struct trace_entry *ent) 970 { 971 return te_blk_io_trace(ent) + 1; 972 } 973 974 static inline u32 t_sec(const struct trace_entry *ent) 975 { 976 return te_blk_io_trace(ent)->bytes >> 9; 977 } 978 979 static inline unsigned long long t_sector(const struct trace_entry *ent) 980 { 981 return te_blk_io_trace(ent)->sector; 982 } 983 984 static inline __u16 t_error(const struct trace_entry *ent) 985 { 986 return te_blk_io_trace(ent)->error; 987 } 988 989 static __u64 get_pdu_int(const struct trace_entry *ent) 990 { 991 const __u64 *val = pdu_start(ent); 992 return be64_to_cpu(*val); 993 } 994 995 static void get_pdu_remap(const struct trace_entry *ent, 996 struct blk_io_trace_remap *r) 997 { 998 const struct blk_io_trace_remap *__r = pdu_start(ent); 999 __u64 sector = __r->sector; 1000 1001 r->device = be32_to_cpu(__r->device); 1002 r->device_from = be32_to_cpu(__r->device_from); 1003 r->sector = be64_to_cpu(sector); 1004 } 1005 1006 typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); 1007 1008 static int blk_log_action_classic(struct trace_iterator *iter, const char *act) 1009 { 1010 char rwbs[6]; 1011 unsigned long long ts = iter->ts; 1012 unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); 1013 unsigned secs = (unsigned long)ts; 1014 const struct blk_io_trace *t = te_blk_io_trace(iter->ent); 1015 1016 fill_rwbs(rwbs, t); 1017 1018 return trace_seq_printf(&iter->seq, 1019 "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ", 1020 MAJOR(t->device), MINOR(t->device), iter->cpu, 1021 secs, nsec_rem, iter->ent->pid, act, rwbs); 1022 } 1023 1024 static int blk_log_action(struct trace_iterator *iter, const char *act) 1025 { 1026 char rwbs[6]; 1027 const struct blk_io_trace *t = te_blk_io_trace(iter->ent); 1028 1029 fill_rwbs(rwbs, t); 1030 return trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", 1031 MAJOR(t->device), MINOR(t->device), act, rwbs); 1032 } 1033 1034 static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) 1035 { 1036 char cmd[TASK_COMM_LEN]; 1037 1038 trace_find_cmdline(ent->pid, cmd); 1039 1040 if (t_sec(ent)) 1041 return trace_seq_printf(s, "%llu + %u [%s]\n", 1042 t_sector(ent), t_sec(ent), cmd); 1043 return trace_seq_printf(s, "[%s]\n", cmd); 1044 } 1045 1046 static int blk_log_with_error(struct trace_seq *s, 1047 const struct trace_entry *ent) 1048 { 1049 if (t_sec(ent)) 1050 return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent), 1051 t_sec(ent), t_error(ent)); 1052 return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent)); 1053 } 1054 1055 static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) 1056 { 1057 struct blk_io_trace_remap r = { .device = 0, }; 1058 1059 get_pdu_remap(ent, &r); 1060 return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", 1061 t_sector(ent), 1062 t_sec(ent), MAJOR(r.device), MINOR(r.device), 1063 (unsigned long long)r.sector); 1064 } 1065 1066 static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) 1067 { 1068 char cmd[TASK_COMM_LEN]; 1069 1070 trace_find_cmdline(ent->pid, cmd); 1071 1072 return trace_seq_printf(s, "[%s]\n", cmd); 1073 } 1074 1075 static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent) 1076 { 1077 char cmd[TASK_COMM_LEN]; 1078 1079 trace_find_cmdline(ent->pid, cmd); 1080 1081 return trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent)); 1082 } 1083 1084 static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent) 1085 { 1086 char cmd[TASK_COMM_LEN]; 1087 1088 trace_find_cmdline(ent->pid, cmd); 1089 1090 return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent), 1091 get_pdu_int(ent), cmd); 1092 } 1093 1094 static int blk_log_msg(struct trace_seq *s, const struct trace_entry *ent) 1095 { 1096 int ret; 1097 const struct blk_io_trace *t = te_blk_io_trace(ent); 1098 1099 ret = trace_seq_putmem(s, t + 1, t->pdu_len); 1100 if (ret) 1101 return trace_seq_putc(s, '\n'); 1102 return ret; 1103 } 1104 1105 /* 1106 * struct tracer operations 1107 */ 1108 1109 static void blk_tracer_print_header(struct seq_file *m) 1110 { 1111 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) 1112 return; 1113 seq_puts(m, "# DEV CPU TIMESTAMP PID ACT FLG\n" 1114 "# | | | | | |\n"); 1115 } 1116 1117 static void blk_tracer_start(struct trace_array *tr) 1118 { 1119 blk_tracer_enabled = true; 1120 trace_flags &= ~TRACE_ITER_CONTEXT_INFO; 1121 } 1122 1123 static int blk_tracer_init(struct trace_array *tr) 1124 { 1125 blk_tr = tr; 1126 blk_tracer_start(tr); 1127 return 0; 1128 } 1129 1130 static void blk_tracer_stop(struct trace_array *tr) 1131 { 1132 blk_tracer_enabled = false; 1133 trace_flags |= TRACE_ITER_CONTEXT_INFO; 1134 } 1135 1136 static void blk_tracer_reset(struct trace_array *tr) 1137 { 1138 blk_tracer_stop(tr); 1139 } 1140 1141 static const struct { 1142 const char *act[2]; 1143 int (*print)(struct trace_seq *s, const struct trace_entry *ent); 1144 } what2act[] = { 1145 [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, 1146 [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, 1147 [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, 1148 [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic }, 1149 [__BLK_TA_SLEEPRQ] = {{ "S", "sleeprq" }, blk_log_generic }, 1150 [__BLK_TA_REQUEUE] = {{ "R", "requeue" }, blk_log_with_error }, 1151 [__BLK_TA_ISSUE] = {{ "D", "issue" }, blk_log_generic }, 1152 [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error }, 1153 [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug }, 1154 [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug }, 1155 [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug }, 1156 [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic }, 1157 [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split }, 1158 [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic }, 1159 [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, 1160 }; 1161 1162 static enum print_line_t print_one_line(struct trace_iterator *iter, 1163 bool classic) 1164 { 1165 struct trace_seq *s = &iter->seq; 1166 const struct blk_io_trace *t; 1167 u16 what; 1168 int ret; 1169 bool long_act; 1170 blk_log_action_t *log_action; 1171 1172 t = te_blk_io_trace(iter->ent); 1173 what = t->action & ((1 << BLK_TC_SHIFT) - 1); 1174 long_act = !!(trace_flags & TRACE_ITER_VERBOSE); 1175 log_action = classic ? &blk_log_action_classic : &blk_log_action; 1176 1177 if (t->action == BLK_TN_MESSAGE) { 1178 ret = log_action(iter, long_act ? "message" : "m"); 1179 if (ret) 1180 ret = blk_log_msg(s, iter->ent); 1181 goto out; 1182 } 1183 1184 if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) 1185 ret = trace_seq_printf(s, "Bad pc action %x\n", what); 1186 else { 1187 ret = log_action(iter, what2act[what].act[long_act]); 1188 if (ret) 1189 ret = what2act[what].print(s, iter->ent); 1190 } 1191 out: 1192 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; 1193 } 1194 1195 static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, 1196 int flags) 1197 { 1198 if (!trace_print_context(iter)) 1199 return TRACE_TYPE_PARTIAL_LINE; 1200 1201 return print_one_line(iter, false); 1202 } 1203 1204 static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) 1205 { 1206 struct trace_seq *s = &iter->seq; 1207 struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; 1208 const int offset = offsetof(struct blk_io_trace, sector); 1209 struct blk_io_trace old = { 1210 .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION, 1211 .time = iter->ts, 1212 }; 1213 1214 if (!trace_seq_putmem(s, &old, offset)) 1215 return 0; 1216 return trace_seq_putmem(s, &t->sector, 1217 sizeof(old) - offset + t->pdu_len); 1218 } 1219 1220 static enum print_line_t 1221 blk_trace_event_print_binary(struct trace_iterator *iter, int flags) 1222 { 1223 return blk_trace_synthesize_old_trace(iter) ? 1224 TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; 1225 } 1226 1227 static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) 1228 { 1229 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) 1230 return TRACE_TYPE_UNHANDLED; 1231 1232 return print_one_line(iter, true); 1233 } 1234 1235 static struct tracer blk_tracer __read_mostly = { 1236 .name = "blk", 1237 .init = blk_tracer_init, 1238 .reset = blk_tracer_reset, 1239 .start = blk_tracer_start, 1240 .stop = blk_tracer_stop, 1241 .print_header = blk_tracer_print_header, 1242 .print_line = blk_tracer_print_line, 1243 .flags = &blk_tracer_flags, 1244 }; 1245 1246 static struct trace_event trace_blk_event = { 1247 .type = TRACE_BLK, 1248 .trace = blk_trace_event_print, 1249 .binary = blk_trace_event_print_binary, 1250 }; 1251 1252 static int __init init_blk_tracer(void) 1253 { 1254 if (!register_ftrace_event(&trace_blk_event)) { 1255 pr_warning("Warning: could not register block events\n"); 1256 return 1; 1257 } 1258 1259 if (register_tracer(&blk_tracer) != 0) { 1260 pr_warning("Warning: could not register the block tracer\n"); 1261 unregister_ftrace_event(&trace_blk_event); 1262 return 1; 1263 } 1264 1265 return 0; 1266 } 1267 1268 device_initcall(init_blk_tracer); 1269 1270 static int blk_trace_remove_queue(struct request_queue *q) 1271 { 1272 struct blk_trace *bt; 1273 1274 bt = xchg(&q->blk_trace, NULL); 1275 if (bt == NULL) 1276 return -EINVAL; 1277 1278 if (atomic_dec_and_test(&blk_probes_ref)) 1279 blk_unregister_tracepoints(); 1280 1281 blk_trace_free(bt); 1282 return 0; 1283 } 1284 1285 /* 1286 * Setup everything required to start tracing 1287 */ 1288 static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) 1289 { 1290 struct blk_trace *old_bt, *bt = NULL; 1291 int ret = -ENOMEM; 1292 1293 bt = kzalloc(sizeof(*bt), GFP_KERNEL); 1294 if (!bt) 1295 return -ENOMEM; 1296 1297 bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); 1298 if (!bt->msg_data) 1299 goto free_bt; 1300 1301 bt->dev = dev; 1302 bt->act_mask = (u16)-1; 1303 bt->end_lba = -1ULL; 1304 1305 old_bt = xchg(&q->blk_trace, bt); 1306 if (old_bt != NULL) { 1307 (void)xchg(&q->blk_trace, old_bt); 1308 ret = -EBUSY; 1309 goto free_bt; 1310 } 1311 1312 if (atomic_inc_return(&blk_probes_ref) == 1) 1313 blk_register_tracepoints(); 1314 return 0; 1315 1316 free_bt: 1317 blk_trace_free(bt); 1318 return ret; 1319 } 1320 1321 /* 1322 * sysfs interface to enable and configure tracing 1323 */ 1324 1325 static ssize_t sysfs_blk_trace_attr_show(struct device *dev, 1326 struct device_attribute *attr, 1327 char *buf); 1328 static ssize_t sysfs_blk_trace_attr_store(struct device *dev, 1329 struct device_attribute *attr, 1330 const char *buf, size_t count); 1331 #define BLK_TRACE_DEVICE_ATTR(_name) \ 1332 DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \ 1333 sysfs_blk_trace_attr_show, \ 1334 sysfs_blk_trace_attr_store) 1335 1336 static BLK_TRACE_DEVICE_ATTR(enable); 1337 static BLK_TRACE_DEVICE_ATTR(act_mask); 1338 static BLK_TRACE_DEVICE_ATTR(pid); 1339 static BLK_TRACE_DEVICE_ATTR(start_lba); 1340 static BLK_TRACE_DEVICE_ATTR(end_lba); 1341 1342 static struct attribute *blk_trace_attrs[] = { 1343 &dev_attr_enable.attr, 1344 &dev_attr_act_mask.attr, 1345 &dev_attr_pid.attr, 1346 &dev_attr_start_lba.attr, 1347 &dev_attr_end_lba.attr, 1348 NULL 1349 }; 1350 1351 struct attribute_group blk_trace_attr_group = { 1352 .name = "trace", 1353 .attrs = blk_trace_attrs, 1354 }; 1355 1356 static const struct { 1357 int mask; 1358 const char *str; 1359 } mask_maps[] = { 1360 { BLK_TC_READ, "read" }, 1361 { BLK_TC_WRITE, "write" }, 1362 { BLK_TC_BARRIER, "barrier" }, 1363 { BLK_TC_SYNC, "sync" }, 1364 { BLK_TC_QUEUE, "queue" }, 1365 { BLK_TC_REQUEUE, "requeue" }, 1366 { BLK_TC_ISSUE, "issue" }, 1367 { BLK_TC_COMPLETE, "complete" }, 1368 { BLK_TC_FS, "fs" }, 1369 { BLK_TC_PC, "pc" }, 1370 { BLK_TC_AHEAD, "ahead" }, 1371 { BLK_TC_META, "meta" }, 1372 { BLK_TC_DISCARD, "discard" }, 1373 { BLK_TC_DRV_DATA, "drv_data" }, 1374 }; 1375 1376 static int blk_trace_str2mask(const char *str) 1377 { 1378 int i; 1379 int mask = 0; 1380 char *buf, *s, *token; 1381 1382 buf = kstrdup(str, GFP_KERNEL); 1383 if (buf == NULL) 1384 return -ENOMEM; 1385 s = strstrip(buf); 1386 1387 while (1) { 1388 token = strsep(&s, ","); 1389 if (token == NULL) 1390 break; 1391 1392 if (*token == '\0') 1393 continue; 1394 1395 for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { 1396 if (strcasecmp(token, mask_maps[i].str) == 0) { 1397 mask |= mask_maps[i].mask; 1398 break; 1399 } 1400 } 1401 if (i == ARRAY_SIZE(mask_maps)) { 1402 mask = -EINVAL; 1403 break; 1404 } 1405 } 1406 kfree(buf); 1407 1408 return mask; 1409 } 1410 1411 static ssize_t blk_trace_mask2str(char *buf, int mask) 1412 { 1413 int i; 1414 char *p = buf; 1415 1416 for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { 1417 if (mask & mask_maps[i].mask) { 1418 p += sprintf(p, "%s%s", 1419 (p == buf) ? "" : ",", mask_maps[i].str); 1420 } 1421 } 1422 *p++ = '\n'; 1423 1424 return p - buf; 1425 } 1426 1427 static struct request_queue *blk_trace_get_queue(struct block_device *bdev) 1428 { 1429 if (bdev->bd_disk == NULL) 1430 return NULL; 1431 1432 return bdev_get_queue(bdev); 1433 } 1434 1435 static ssize_t sysfs_blk_trace_attr_show(struct device *dev, 1436 struct device_attribute *attr, 1437 char *buf) 1438 { 1439 struct hd_struct *p = dev_to_part(dev); 1440 struct request_queue *q; 1441 struct block_device *bdev; 1442 ssize_t ret = -ENXIO; 1443 1444 lock_kernel(); 1445 bdev = bdget(part_devt(p)); 1446 if (bdev == NULL) 1447 goto out_unlock_kernel; 1448 1449 q = blk_trace_get_queue(bdev); 1450 if (q == NULL) 1451 goto out_bdput; 1452 1453 mutex_lock(&bdev->bd_mutex); 1454 1455 if (attr == &dev_attr_enable) { 1456 ret = sprintf(buf, "%u\n", !!q->blk_trace); 1457 goto out_unlock_bdev; 1458 } 1459 1460 if (q->blk_trace == NULL) 1461 ret = sprintf(buf, "disabled\n"); 1462 else if (attr == &dev_attr_act_mask) 1463 ret = blk_trace_mask2str(buf, q->blk_trace->act_mask); 1464 else if (attr == &dev_attr_pid) 1465 ret = sprintf(buf, "%u\n", q->blk_trace->pid); 1466 else if (attr == &dev_attr_start_lba) 1467 ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); 1468 else if (attr == &dev_attr_end_lba) 1469 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); 1470 1471 out_unlock_bdev: 1472 mutex_unlock(&bdev->bd_mutex); 1473 out_bdput: 1474 bdput(bdev); 1475 out_unlock_kernel: 1476 unlock_kernel(); 1477 return ret; 1478 } 1479 1480 static ssize_t sysfs_blk_trace_attr_store(struct device *dev, 1481 struct device_attribute *attr, 1482 const char *buf, size_t count) 1483 { 1484 struct block_device *bdev; 1485 struct request_queue *q; 1486 struct hd_struct *p; 1487 u64 value; 1488 ssize_t ret = -EINVAL; 1489 1490 if (count == 0) 1491 goto out; 1492 1493 if (attr == &dev_attr_act_mask) { 1494 if (sscanf(buf, "%llx", &value) != 1) { 1495 /* Assume it is a list of trace category names */ 1496 ret = blk_trace_str2mask(buf); 1497 if (ret < 0) 1498 goto out; 1499 value = ret; 1500 } 1501 } else if (sscanf(buf, "%llu", &value) != 1) 1502 goto out; 1503 1504 ret = -ENXIO; 1505 1506 lock_kernel(); 1507 p = dev_to_part(dev); 1508 bdev = bdget(part_devt(p)); 1509 if (bdev == NULL) 1510 goto out_unlock_kernel; 1511 1512 q = blk_trace_get_queue(bdev); 1513 if (q == NULL) 1514 goto out_bdput; 1515 1516 mutex_lock(&bdev->bd_mutex); 1517 1518 if (attr == &dev_attr_enable) { 1519 if (value) 1520 ret = blk_trace_setup_queue(q, bdev->bd_dev); 1521 else 1522 ret = blk_trace_remove_queue(q); 1523 goto out_unlock_bdev; 1524 } 1525 1526 ret = 0; 1527 if (q->blk_trace == NULL) 1528 ret = blk_trace_setup_queue(q, bdev->bd_dev); 1529 1530 if (ret == 0) { 1531 if (attr == &dev_attr_act_mask) 1532 q->blk_trace->act_mask = value; 1533 else if (attr == &dev_attr_pid) 1534 q->blk_trace->pid = value; 1535 else if (attr == &dev_attr_start_lba) 1536 q->blk_trace->start_lba = value; 1537 else if (attr == &dev_attr_end_lba) 1538 q->blk_trace->end_lba = value; 1539 } 1540 1541 out_unlock_bdev: 1542 mutex_unlock(&bdev->bd_mutex); 1543 out_bdput: 1544 bdput(bdev); 1545 out_unlock_kernel: 1546 unlock_kernel(); 1547 out: 1548 return ret ? ret : count; 1549 } 1550 1551