Lines Matching +full:entry +full:- +full:latency

1 // SPDX-License-Identifier: GPL-2.0
3 * The Kyber I/O scheduler. Controls latency by throttling queue depths using
18 #include "blk-mq.h"
19 #include "blk-mq-debugfs.h"
20 #include "blk-mq-sched.h"
54 * Maximum device-wide depth for each scheduling domain.
68 * Default latency targets for each scheduling domain.
89 * to the target latency:
91 * <= 1/4 * target latency
92 * <= 1/2 * target latency
93 * <= 3/4 * target latency
94 * <= target latency
95 * <= 1 1/4 * target latency
96 * <= 1 1/2 * target latency
97 * <= 1 3/4 * target latency
98 * > 1 3/4 * target latency
102 * The width of the latency histogram buckets is
103 * 1 / (1 << KYBER_LATENCY_SHIFT) * target latency.
107 * The first (1 << KYBER_LATENCY_SHIFT) buckets are <= target latency,
116 * We measure both the total latency and the I/O latency (i.e., latency after
130 * Per-cpu latency histograms: total latency and I/O latency for each scheduling
139 * we use request->mq_ctx->index_hw to index the kcq in khd.
155 * Each scheduling domain has a limited number of in-flight requests
156 * device-wide, limited by these tokens.
161 * Async request percentage, converted to per-word depth for
214 unsigned int *buckets = kqd->latency_buckets[sched_domain][type]; in flush_latency_buckets()
215 atomic_t *cpu_buckets = cpu_latency->buckets[sched_domain][type]; in flush_latency_buckets()
223 * Calculate the histogram bucket with the given percentile rank, or -1 if there
230 unsigned int *buckets = kqd->latency_buckets[sched_domain][type]; in calculate_percentile()
237 return -1; in calculate_percentile()
243 if (!kqd->latency_timeout[sched_domain]) in calculate_percentile()
244 kqd->latency_timeout[sched_domain] = max(jiffies + HZ, 1UL); in calculate_percentile()
246 time_is_after_jiffies(kqd->latency_timeout[sched_domain])) { in calculate_percentile()
247 return -1; in calculate_percentile()
249 kqd->latency_timeout[sched_domain] = 0; in calculate_percentile()
252 for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS - 1; bucket++) { in calculate_percentile()
255 percentile_samples -= buckets[bucket]; in calculate_percentile()
257 memset(buckets, 0, sizeof(kqd->latency_buckets[sched_domain][type])); in calculate_percentile()
259 trace_kyber_latency(kqd->dev, kyber_domain_names[sched_domain], in calculate_percentile()
270 if (depth != kqd->domain_tokens[sched_domain].sb.depth) { in kyber_resize_domain()
271 sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth); in kyber_resize_domain()
272 trace_kyber_adjust(kqd->dev, kyber_domain_names[sched_domain], in kyber_resize_domain()
284 /* Sum all of the per-cpu latency histograms. */ in kyber_timer_fn()
288 cpu_latency = per_cpu_ptr(kqd->cpu_latency, cpu); in kyber_timer_fn()
298 * Check if any domains have a high I/O latency, which might indicate in kyber_timer_fn()
324 * necessarily have enough samples to calculate the latency in kyber_timer_fn()
328 * reset it to -1. in kyber_timer_fn()
332 p99 = kqd->domain_p99[sched_domain]; in kyber_timer_fn()
333 kqd->domain_p99[sched_domain] = -1; in kyber_timer_fn()
335 kqd->domain_p99[sched_domain] = p99; in kyber_timer_fn()
341 * If this domain has bad latency, throttle less. Otherwise, in kyber_timer_fn()
344 * The new depth is scaled linearly with the p99 latency vs the in kyber_timer_fn()
345 * latency target. E.g., if the p99 is 3/4 of the target, then in kyber_timer_fn()
350 orig_depth = kqd->domain_tokens[sched_domain].sb.depth; in kyber_timer_fn()
360 int ret = -ENOMEM; in kyber_queue_data_alloc()
363 kqd = kzalloc_node(sizeof(*kqd), GFP_KERNEL, q->node); in kyber_queue_data_alloc()
367 kqd->q = q; in kyber_queue_data_alloc()
368 kqd->dev = disk_devt(q->disk); in kyber_queue_data_alloc()
370 kqd->cpu_latency = alloc_percpu_gfp(struct kyber_cpu_latency, in kyber_queue_data_alloc()
372 if (!kqd->cpu_latency) in kyber_queue_data_alloc()
375 timer_setup(&kqd->timer, kyber_timer_fn, 0); in kyber_queue_data_alloc()
380 ret = sbitmap_queue_init_node(&kqd->domain_tokens[i], in kyber_queue_data_alloc()
381 kyber_depth[i], -1, false, in kyber_queue_data_alloc()
382 GFP_KERNEL, q->node); in kyber_queue_data_alloc()
384 while (--i >= 0) in kyber_queue_data_alloc()
385 sbitmap_queue_free(&kqd->domain_tokens[i]); in kyber_queue_data_alloc()
391 kqd->domain_p99[i] = -1; in kyber_queue_data_alloc()
392 kqd->latency_targets[i] = kyber_latency_targets[i]; in kyber_queue_data_alloc()
398 free_percpu(kqd->cpu_latency); in kyber_queue_data_alloc()
412 return -ENOMEM; in kyber_init_sched()
416 kobject_put(&eq->kobj); in kyber_init_sched()
424 eq->elevator_data = kqd; in kyber_init_sched()
425 q->elevator = eq; in kyber_init_sched()
432 struct kyber_queue_data *kqd = e->elevator_data; in kyber_exit_sched()
435 timer_shutdown_sync(&kqd->timer); in kyber_exit_sched()
436 blk_stat_disable_accounting(kqd->q); in kyber_exit_sched()
439 sbitmap_queue_free(&kqd->domain_tokens[i]); in kyber_exit_sched()
440 free_percpu(kqd->cpu_latency); in kyber_exit_sched()
448 spin_lock_init(&kcq->lock); in kyber_ctx_queue_init()
450 INIT_LIST_HEAD(&kcq->rq_list[i]); in kyber_ctx_queue_init()
455 struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data; in kyber_depth_updated()
456 struct blk_mq_tags *tags = hctx->sched_tags; in kyber_depth_updated()
457 unsigned int shift = tags->bitmap_tags.sb.shift; in kyber_depth_updated()
459 kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U; in kyber_depth_updated()
461 sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, kqd->async_depth); in kyber_depth_updated()
469 khd = kmalloc_node(sizeof(*khd), GFP_KERNEL, hctx->numa_node); in kyber_init_hctx()
471 return -ENOMEM; in kyber_init_hctx()
473 khd->kcqs = kmalloc_array_node(hctx->nr_ctx, in kyber_init_hctx()
475 GFP_KERNEL, hctx->numa_node); in kyber_init_hctx()
476 if (!khd->kcqs) in kyber_init_hctx()
479 for (i = 0; i < hctx->nr_ctx; i++) in kyber_init_hctx()
480 kyber_ctx_queue_init(&khd->kcqs[i]); in kyber_init_hctx()
483 if (sbitmap_init_node(&khd->kcq_map[i], hctx->nr_ctx, in kyber_init_hctx()
484 ilog2(8), GFP_KERNEL, hctx->numa_node, in kyber_init_hctx()
486 while (--i >= 0) in kyber_init_hctx()
487 sbitmap_free(&khd->kcq_map[i]); in kyber_init_hctx()
492 spin_lock_init(&khd->lock); in kyber_init_hctx()
495 INIT_LIST_HEAD(&khd->rqs[i]); in kyber_init_hctx()
496 khd->domain_wait[i].sbq = NULL; in kyber_init_hctx()
497 init_waitqueue_func_entry(&khd->domain_wait[i].wait, in kyber_init_hctx()
499 khd->domain_wait[i].wait.private = hctx; in kyber_init_hctx()
500 INIT_LIST_HEAD(&khd->domain_wait[i].wait.entry); in kyber_init_hctx()
501 atomic_set(&khd->wait_index[i], 0); in kyber_init_hctx()
504 khd->cur_domain = 0; in kyber_init_hctx()
505 khd->batching = 0; in kyber_init_hctx()
507 hctx->sched_data = khd; in kyber_init_hctx()
513 kfree(khd->kcqs); in kyber_init_hctx()
516 return -ENOMEM; in kyber_init_hctx()
521 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_exit_hctx()
525 sbitmap_free(&khd->kcq_map[i]); in kyber_exit_hctx()
526 kfree(khd->kcqs); in kyber_exit_hctx()
527 kfree(hctx->sched_data); in kyber_exit_hctx()
532 return (long)rq->elv.priv[0]; in rq_get_domain_token()
537 rq->elv.priv[0] = (void *)(long)token; in rq_set_domain_token()
547 if (nr != -1) { in rq_clear_domain_token()
548 sched_domain = kyber_sched_domain(rq->cmd_flags); in rq_clear_domain_token()
549 sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr, in rq_clear_domain_token()
550 rq->mq_ctx->cpu); in rq_clear_domain_token()
557 * We use the scheduler tags as per-hardware queue queueing tokens. in kyber_limit_depth()
561 struct kyber_queue_data *kqd = data->q->elevator->elevator_data; in kyber_limit_depth()
563 data->shallow_depth = kqd->async_depth; in kyber_limit_depth()
571 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); in kyber_bio_merge()
572 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_bio_merge()
573 struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]]; in kyber_bio_merge()
574 unsigned int sched_domain = kyber_sched_domain(bio->bi_opf); in kyber_bio_merge()
575 struct list_head *rq_list = &kcq->rq_list[sched_domain]; in kyber_bio_merge()
578 spin_lock(&kcq->lock); in kyber_bio_merge()
579 merged = blk_bio_list_merge(hctx->queue, rq_list, bio, nr_segs); in kyber_bio_merge()
580 spin_unlock(&kcq->lock); in kyber_bio_merge()
587 rq_set_domain_token(rq, -1); in kyber_prepare_request()
594 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_insert_requests()
598 unsigned int sched_domain = kyber_sched_domain(rq->cmd_flags); in kyber_insert_requests()
599 struct kyber_ctx_queue *kcq = &khd->kcqs[rq->mq_ctx->index_hw[hctx->type]]; in kyber_insert_requests()
600 struct list_head *head = &kcq->rq_list[sched_domain]; in kyber_insert_requests()
602 spin_lock(&kcq->lock); in kyber_insert_requests()
605 list_move(&rq->queuelist, head); in kyber_insert_requests()
607 list_move_tail(&rq->queuelist, head); in kyber_insert_requests()
608 sbitmap_set_bit(&khd->kcq_map[sched_domain], in kyber_insert_requests()
609 rq->mq_ctx->index_hw[hctx->type]); in kyber_insert_requests()
610 spin_unlock(&kcq->lock); in kyber_insert_requests()
616 struct kyber_queue_data *kqd = rq->q->elevator->elevator_data; in kyber_finish_request()
623 u64 target, u64 latency) in add_latency_sample() argument
628 if (latency > 0) { in add_latency_sample()
630 bucket = min_t(unsigned int, div64_u64(latency - 1, divisor), in add_latency_sample()
631 KYBER_LATENCY_BUCKETS - 1); in add_latency_sample()
636 atomic_inc(&cpu_latency->buckets[sched_domain][type][bucket]); in add_latency_sample()
641 struct kyber_queue_data *kqd = rq->q->elevator->elevator_data; in kyber_completed_request()
646 sched_domain = kyber_sched_domain(rq->cmd_flags); in kyber_completed_request()
650 cpu_latency = get_cpu_ptr(kqd->cpu_latency); in kyber_completed_request()
651 target = kqd->latency_targets[sched_domain]; in kyber_completed_request()
653 target, now - rq->start_time_ns); in kyber_completed_request()
655 now - rq->io_start_time_ns); in kyber_completed_request()
656 put_cpu_ptr(kqd->cpu_latency); in kyber_completed_request()
658 timer_reduce(&kqd->timer, jiffies + HZ / 10); in kyber_completed_request()
670 struct kyber_ctx_queue *kcq = &flush_data->khd->kcqs[bitnr]; in flush_busy_kcq()
672 spin_lock(&kcq->lock); in flush_busy_kcq()
673 list_splice_tail_init(&kcq->rq_list[flush_data->sched_domain], in flush_busy_kcq()
674 flush_data->list); in flush_busy_kcq()
676 spin_unlock(&kcq->lock); in flush_busy_kcq()
691 sbitmap_for_each_set(&khd->kcq_map[sched_domain], in kyber_flush_busy_kcqs()
698 struct blk_mq_hw_ctx *hctx = READ_ONCE(wqe->private); in kyber_domain_wake()
710 unsigned int sched_domain = khd->cur_domain; in kyber_get_domain_token()
711 struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain]; in kyber_get_domain_token()
712 struct sbq_wait *wait = &khd->domain_wait[sched_domain]; in kyber_get_domain_token()
721 * khd->lock, but we still need to be careful about the waker. in kyber_get_domain_token()
723 if (nr < 0 && list_empty_careful(&wait->wait.entry)) { in kyber_get_domain_token()
725 &khd->wait_index[sched_domain]); in kyber_get_domain_token()
726 khd->domain_ws[sched_domain] = ws; in kyber_get_domain_token()
739 * progress. It's possible that the waker already deleted the entry in kyber_get_domain_token()
743 if (nr >= 0 && !list_empty_careful(&wait->wait.entry)) { in kyber_get_domain_token()
744 ws = khd->domain_ws[sched_domain]; in kyber_get_domain_token()
745 spin_lock_irq(&ws->wait.lock); in kyber_get_domain_token()
747 spin_unlock_irq(&ws->wait.lock); in kyber_get_domain_token()
762 rqs = &khd->rqs[khd->cur_domain]; in kyber_dispatch_cur_domain()
769 * khd->lock serializes the flushes, so if we observed any bit set in in kyber_dispatch_cur_domain()
776 khd->batching++; in kyber_dispatch_cur_domain()
778 list_del_init(&rq->queuelist); in kyber_dispatch_cur_domain()
781 trace_kyber_throttled(kqd->dev, in kyber_dispatch_cur_domain()
782 kyber_domain_names[khd->cur_domain]); in kyber_dispatch_cur_domain()
784 } else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) { in kyber_dispatch_cur_domain()
787 kyber_flush_busy_kcqs(khd, khd->cur_domain, rqs); in kyber_dispatch_cur_domain()
789 khd->batching++; in kyber_dispatch_cur_domain()
791 list_del_init(&rq->queuelist); in kyber_dispatch_cur_domain()
794 trace_kyber_throttled(kqd->dev, in kyber_dispatch_cur_domain()
795 kyber_domain_names[khd->cur_domain]); in kyber_dispatch_cur_domain()
805 struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data; in kyber_dispatch_request()
806 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_dispatch_request()
810 spin_lock(&khd->lock); in kyber_dispatch_request()
816 if (khd->batching < kyber_batch_size[khd->cur_domain]) { in kyber_dispatch_request()
831 khd->batching = 0; in kyber_dispatch_request()
833 if (khd->cur_domain == KYBER_NUM_DOMAINS - 1) in kyber_dispatch_request()
834 khd->cur_domain = 0; in kyber_dispatch_request()
836 khd->cur_domain++; in kyber_dispatch_request()
845 spin_unlock(&khd->lock); in kyber_dispatch_request()
851 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_has_work()
855 if (!list_empty_careful(&khd->rqs[i]) || in kyber_has_work()
856 sbitmap_any_bit_set(&khd->kcq_map[i])) in kyber_has_work()
867 struct kyber_queue_data *kqd = e->elevator_data; \
869 return sprintf(page, "%llu\n", kqd->latency_targets[domain]); \
875 struct kyber_queue_data *kqd = e->elevator_data; \
883 kqd->latency_targets[domain] = nsec; \
904 struct kyber_queue_data *kqd = q->elevator->elevator_data; \
906 sbitmap_queue_show(&kqd->domain_tokens[domain], m); \
911 __acquires(&khd->lock) \
913 struct blk_mq_hw_ctx *hctx = m->private; \
914 struct kyber_hctx_data *khd = hctx->sched_data; \
916 spin_lock(&khd->lock); \
917 return seq_list_start(&khd->rqs[domain], *pos); \
923 struct blk_mq_hw_ctx *hctx = m->private; \
924 struct kyber_hctx_data *khd = hctx->sched_data; \
926 return seq_list_next(v, &khd->rqs[domain], pos); \
930 __releases(&khd->lock) \
932 struct blk_mq_hw_ctx *hctx = m->private; \
933 struct kyber_hctx_data *khd = hctx->sched_data; \
935 spin_unlock(&khd->lock); \
948 struct kyber_hctx_data *khd = hctx->sched_data; \
949 wait_queue_entry_t *wait = &khd->domain_wait[domain].wait; \
951 seq_printf(m, "%d\n", !list_empty_careful(&wait->entry)); \
963 struct kyber_queue_data *kqd = q->elevator->elevator_data; in KYBER_DEBUGFS_DOMAIN_ATTRS()
965 seq_printf(m, "%u\n", kqd->async_depth); in KYBER_DEBUGFS_DOMAIN_ATTRS()
972 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_cur_domain_show()
974 seq_printf(m, "%s\n", kyber_domain_names[khd->cur_domain]); in kyber_cur_domain_show()
981 struct kyber_hctx_data *khd = hctx->sched_data; in kyber_batching_show()
983 seq_printf(m, "%u\n", khd->batching); in kyber_batching_show()