Lines Matching +full:xps +full:- +full:timer +full:- +full:1

1 // SPDX-License-Identifier: GPL-2.0-or-later
5 * Copyright (C) 2013-2015 Eric Dumazet <edumazet@google.com>
8 * Fast classification depends on skb->sk being set before reaching us.
17 * Transport (eg TCP) can set in sk->sk_pacing_rate a rate, enqueue a
22 * - lookup one RB tree (out of 1024 or more) to find the flow.
25 * - Use a special fifo for high prio packets
60 return (struct fq_skb_cb *)qdisc_skb_cb(skb)->data; in fq_skb_cb()
65 * If packets have monotically increasing time_to_send, they are placed in O(1)
74 unsigned long age; /* (jiffies | 1UL) when flow was emptied, for gc */
87 struct rb_node rate_node; /* anchor in q->delayed tree */
139 * f->tail and f->age share the same location.
142 * This assumes f->tail low order bit must be 0 since alignof(struct sk_buff) >= 2
146 f->age = jiffies | 1UL; in fq_flow_set_detached()
151 return !!(f->age & 1UL); in fq_flow_is_detached()
159 return f->next == &throttled; in fq_flow_is_throttled()
164 if (head->first) in fq_flow_add_tail()
165 head->last->next = flow; in fq_flow_add_tail()
167 head->first = flow; in fq_flow_add_tail()
168 head->last = flow; in fq_flow_add_tail()
169 flow->next = NULL; in fq_flow_add_tail()
174 rb_erase(&f->rate_node, &q->delayed); in fq_flow_unset_throttled()
175 q->throttled_flows--; in fq_flow_unset_throttled()
176 fq_flow_add_tail(&q->old_flows, f); in fq_flow_unset_throttled()
181 struct rb_node **p = &q->delayed.rb_node, *parent = NULL; in fq_flow_set_throttled()
188 if (f->time_next_packet >= aux->time_next_packet) in fq_flow_set_throttled()
189 p = &parent->rb_right; in fq_flow_set_throttled()
191 p = &parent->rb_left; in fq_flow_set_throttled()
193 rb_link_node(&f->rate_node, parent, p); in fq_flow_set_throttled()
194 rb_insert_color(&f->rate_node, &q->delayed); in fq_flow_set_throttled()
195 q->throttled_flows++; in fq_flow_set_throttled()
196 q->stat_throttled++; in fq_flow_set_throttled()
198 f->next = &throttled; in fq_flow_set_throttled()
199 if (q->time_next_delayed_flow > f->time_next_packet) in fq_flow_set_throttled()
200 q->time_next_delayed_flow = f->time_next_packet; in fq_flow_set_throttled()
214 time_after(jiffies, f->age + FQ_GC_AGE); in fq_gc_candidate()
226 p = &root->rb_node; in fq_gc()
232 if (f->sk == sk) in fq_gc()
241 if (f->sk > sk) in fq_gc()
242 p = &parent->rb_right; in fq_gc()
244 p = &parent->rb_left; in fq_gc()
251 f = tofree[--i]; in fq_gc()
252 rb_erase(&f->fq_node, root); in fq_gc()
254 q->flows -= fcnt; in fq_gc()
255 q->inactive_flows -= fcnt; in fq_gc()
256 q->stat_gc_flows += fcnt; in fq_gc()
264 struct sock *sk = skb->sk; in fq_classify()
269 if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) in fq_classify()
270 return &q->internal; in fq_classify()
274 * 1) request sockets are not full blown, in fq_classify()
282 unsigned long hash = skb_get_hash(skb) & q->orphan_mask; in fq_classify()
284 /* By forcing low order bit to 1, we make sure to not in fq_classify()
287 sk = (struct sock *)((hash << 1) | 1UL); in fq_classify()
289 } else if (sk->sk_state == TCP_CLOSE) { in fq_classify()
290 unsigned long hash = skb_get_hash(skb) & q->orphan_mask; in fq_classify()
299 sk = (struct sock *)((hash << 1) | 1UL); in fq_classify()
302 root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)]; in fq_classify()
304 if (q->flows >= (2U << q->fq_trees_log) && in fq_classify()
305 q->inactive_flows > q->flows/2) in fq_classify()
308 p = &root->rb_node; in fq_classify()
314 if (f->sk == sk) { in fq_classify()
320 if (unlikely(skb->sk == sk && in fq_classify()
321 f->socket_hash != sk->sk_hash)) { in fq_classify()
322 f->credit = q->initial_quantum; in fq_classify()
323 f->socket_hash = sk->sk_hash; in fq_classify()
324 if (q->rate_enable) in fq_classify()
325 smp_store_release(&sk->sk_pacing_status, in fq_classify()
329 f->time_next_packet = 0ULL; in fq_classify()
333 if (f->sk > sk) in fq_classify()
334 p = &parent->rb_right; in fq_classify()
336 p = &parent->rb_left; in fq_classify()
341 q->stat_allocation_errors++; in fq_classify()
342 return &q->internal; in fq_classify()
344 /* f->t_root is already zeroed after kmem_cache_zalloc() */ in fq_classify()
347 f->sk = sk; in fq_classify()
348 if (skb->sk == sk) { in fq_classify()
349 f->socket_hash = sk->sk_hash; in fq_classify()
350 if (q->rate_enable) in fq_classify()
351 smp_store_release(&sk->sk_pacing_status, in fq_classify()
354 f->credit = q->initial_quantum; in fq_classify()
356 rb_link_node(&f->fq_node, parent, p); in fq_classify()
357 rb_insert_color(&f->fq_node, root); in fq_classify()
359 q->flows++; in fq_classify()
360 q->inactive_flows++; in fq_classify()
366 struct sk_buff *skb = skb_rb_first(&flow->t_root); in fq_peek()
367 struct sk_buff *head = flow->head; in fq_peek()
375 if (fq_skb_cb(skb)->time_to_send < fq_skb_cb(head)->time_to_send) in fq_peek()
383 if (skb == flow->head) { in fq_erase_head()
384 flow->head = skb->next; in fq_erase_head()
386 rb_erase(&skb->rbnode, &flow->t_root); in fq_erase_head()
387 skb->dev = qdisc_dev(sch); in fq_erase_head()
399 flow->qlen--; in fq_dequeue_skb()
401 sch->q.qlen--; in fq_dequeue_skb()
409 head = flow->head; in flow_queue_add()
411 fq_skb_cb(skb)->time_to_send >= fq_skb_cb(flow->tail)->time_to_send) { in flow_queue_add()
413 flow->head = skb; in flow_queue_add()
415 flow->tail->next = skb; in flow_queue_add()
416 flow->tail = skb; in flow_queue_add()
417 skb->next = NULL; in flow_queue_add()
421 p = &flow->t_root.rb_node; in flow_queue_add()
427 if (fq_skb_cb(skb)->time_to_send >= fq_skb_cb(aux)->time_to_send) in flow_queue_add()
428 p = &parent->rb_right; in flow_queue_add()
430 p = &parent->rb_left; in flow_queue_add()
432 rb_link_node(&skb->rbnode, parent, p); in flow_queue_add()
433 rb_insert_color(&skb->rbnode, &flow->t_root); in flow_queue_add()
439 return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon)); in fq_packet_beyond_horizon()
448 if (unlikely(sch->q.qlen >= sch->limit)) in fq_enqueue()
451 if (!skb->tstamp) { in fq_enqueue()
452 fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns(); in fq_enqueue()
460 q->ktime_cache = ktime_get_ns(); in fq_enqueue()
462 if (q->horizon_drop) { in fq_enqueue()
463 q->stat_horizon_drops++; in fq_enqueue()
466 q->stat_horizon_caps++; in fq_enqueue()
467 skb->tstamp = q->ktime_cache + q->horizon; in fq_enqueue()
470 fq_skb_cb(skb)->time_to_send = skb->tstamp; in fq_enqueue()
474 if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) { in fq_enqueue()
475 q->stat_flows_plimit++; in fq_enqueue()
479 f->qlen++; in fq_enqueue()
482 fq_flow_add_tail(&q->new_flows, f); in fq_enqueue()
483 if (time_after(jiffies, f->age + q->flow_refill_delay)) in fq_enqueue()
484 f->credit = max_t(u32, f->credit, q->quantum); in fq_enqueue()
485 q->inactive_flows--; in fq_enqueue()
488 /* Note: this overwrites f->age */ in fq_enqueue()
491 if (unlikely(f == &q->internal)) { in fq_enqueue()
492 q->stat_internal_packets++; in fq_enqueue()
494 sch->q.qlen++; in fq_enqueue()
504 if (q->time_next_delayed_flow > now) in fq_check_throttled()
508 * This is cheap and can help diagnosing timer/latency problems. in fq_check_throttled()
510 sample = (unsigned long)(now - q->time_next_delayed_flow); in fq_check_throttled()
511 q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3; in fq_check_throttled()
512 q->unthrottle_latency_ns += sample >> 3; in fq_check_throttled()
514 q->time_next_delayed_flow = ~0ULL; in fq_check_throttled()
515 while ((p = rb_first(&q->delayed)) != NULL) { in fq_check_throttled()
518 if (f->time_next_packet > now) { in fq_check_throttled()
519 q->time_next_delayed_flow = f->time_next_packet; in fq_check_throttled()
536 if (!sch->q.qlen) in fq_dequeue()
539 skb = fq_peek(&q->internal); in fq_dequeue()
541 fq_dequeue_skb(sch, &q->internal, skb); in fq_dequeue()
545 q->ktime_cache = now = ktime_get_ns(); in fq_dequeue()
548 head = &q->new_flows; in fq_dequeue()
549 if (!head->first) { in fq_dequeue()
550 head = &q->old_flows; in fq_dequeue()
551 if (!head->first) { in fq_dequeue()
552 if (q->time_next_delayed_flow != ~0ULL) in fq_dequeue()
553 qdisc_watchdog_schedule_range_ns(&q->watchdog, in fq_dequeue()
554 q->time_next_delayed_flow, in fq_dequeue()
555 q->timer_slack); in fq_dequeue()
559 f = head->first; in fq_dequeue()
561 if (f->credit <= 0) { in fq_dequeue()
562 f->credit += q->quantum; in fq_dequeue()
563 head->first = f->next; in fq_dequeue()
564 fq_flow_add_tail(&q->old_flows, f); in fq_dequeue()
570 u64 time_next_packet = max_t(u64, fq_skb_cb(skb)->time_to_send, in fq_dequeue()
571 f->time_next_packet); in fq_dequeue()
574 head->first = f->next; in fq_dequeue()
575 f->time_next_packet = time_next_packet; in fq_dequeue()
579 prefetch(&skb->end); in fq_dequeue()
580 if ((s64)(now - time_next_packet - q->ce_threshold) > 0) { in fq_dequeue()
582 q->stat_ce_mark++; in fq_dequeue()
586 head->first = f->next; in fq_dequeue()
588 if ((head == &q->new_flows) && q->old_flows.first) { in fq_dequeue()
589 fq_flow_add_tail(&q->old_flows, f); in fq_dequeue()
592 q->inactive_flows++; in fq_dequeue()
597 f->credit -= plen; in fq_dequeue()
599 if (!q->rate_enable) in fq_dequeue()
602 rate = q->flow_max_rate; in fq_dequeue()
605 * update f->time_next_packet only if this qdisc enforces in fq_dequeue()
608 if (!skb->tstamp) { in fq_dequeue()
609 if (skb->sk) in fq_dequeue()
610 rate = min(skb->sk->sk_pacing_rate, rate); in fq_dequeue()
612 if (rate <= q->low_rate_threshold) { in fq_dequeue()
613 f->credit = 0; in fq_dequeue()
615 plen = max(plen, q->quantum); in fq_dequeue()
616 if (f->credit > 0) in fq_dequeue()
626 * clamp the delay to 1 second. in fq_dequeue()
631 q->stat_pkts_too_long++; in fq_dequeue()
634 * f->time_next_packet was set when prior packet was sent, in fq_dequeue()
637 if (f->time_next_packet) in fq_dequeue()
638 len -= min(len/2, now - f->time_next_packet); in fq_dequeue()
639 f->time_next_packet = now + len; in fq_dequeue()
648 struct rb_node *p = rb_first(&flow->t_root); in fq_flow_purge()
654 rb_erase(&skb->rbnode, &flow->t_root); in fq_flow_purge()
657 rtnl_kfree_skbs(flow->head, flow->tail); in fq_flow_purge()
658 flow->head = NULL; in fq_flow_purge()
659 flow->qlen = 0; in fq_flow_purge()
670 sch->q.qlen = 0; in fq_reset()
671 sch->qstats.backlog = 0; in fq_reset()
673 fq_flow_purge(&q->internal); in fq_reset()
675 if (!q->fq_root) in fq_reset()
678 for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { in fq_reset()
679 root = &q->fq_root[idx]; in fq_reset()
689 q->new_flows.first = NULL; in fq_reset()
690 q->old_flows.first = NULL; in fq_reset()
691 q->delayed = RB_ROOT; in fq_reset()
692 q->flows = 0; in fq_reset()
693 q->inactive_flows = 0; in fq_reset()
694 q->throttled_flows = 0; in fq_reset()
707 for (idx = 0; idx < (1U << old_log); idx++) { in fq_rehash()
717 nroot = &new_array[hash_ptr(of->sk, new_log)]; in fq_rehash()
719 np = &nroot->rb_node; in fq_rehash()
725 BUG_ON(nf->sk == of->sk); in fq_rehash()
727 if (nf->sk > of->sk) in fq_rehash()
728 np = &parent->rb_right; in fq_rehash()
730 np = &parent->rb_left; in fq_rehash()
733 rb_link_node(&of->fq_node, parent, np); in fq_rehash()
734 rb_insert_color(&of->fq_node, nroot); in fq_rehash()
737 q->flows -= fcnt; in fq_rehash()
738 q->inactive_flows -= fcnt; in fq_rehash()
739 q->stat_gc_flows += fcnt; in fq_rehash()
754 if (q->fq_root && log == q->fq_trees_log) in fq_resize()
757 /* If XPS was setup, we can allocate memory on right NUMA node */ in fq_resize()
759 netdev_queue_numa_node_read(sch->dev_queue)); in fq_resize()
761 return -ENOMEM; in fq_resize()
763 for (idx = 0; idx < (1U << log); idx++) in fq_resize()
768 old_fq_root = q->fq_root; in fq_resize()
770 fq_rehash(q, old_fq_root, q->fq_trees_log, array, log); in fq_resize()
772 q->fq_root = array; in fq_resize()
773 q->fq_trees_log = log; in fq_resize()
786 static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
810 struct nlattr *tb[TCA_FQ_MAX + 1]; in fq_change()
822 fq_log = q->fq_trees_log; in fq_change()
827 if (nval >= 1 && nval <= ilog2(256*1024)) in fq_change()
830 err = -EINVAL; in fq_change()
833 sch->limit = nla_get_u32(tb[TCA_FQ_PLIMIT]); in fq_change()
836 q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]); in fq_change()
841 if (quantum > 0 && quantum <= (1 << 20)) { in fq_change()
842 q->quantum = quantum; in fq_change()
845 err = -EINVAL; in fq_change()
850 q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); in fq_change()
859 q->flow_max_rate = (rate == ~0U) ? ~0UL : rate; in fq_change()
862 q->low_rate_threshold = in fq_change()
868 if (enable <= 1) in fq_change()
869 q->rate_enable = enable; in fq_change()
871 err = -EINVAL; in fq_change()
877 q->flow_refill_delay = usecs_to_jiffies(usecs_delay); in fq_change()
881 q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]); in fq_change()
884 q->ce_threshold = (u64)NSEC_PER_USEC * in fq_change()
888 q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]); in fq_change()
891 q->horizon = (u64)NSEC_PER_USEC * in fq_change()
895 q->horizon_drop = nla_get_u8(tb[TCA_FQ_HORIZON_DROP]); in fq_change()
903 while (sch->q.qlen > sch->limit) { in fq_change()
923 fq_free(q->fq_root); in fq_destroy()
924 qdisc_watchdog_cancel(&q->watchdog); in fq_destroy()
933 sch->limit = 10000; in fq_init()
934 q->flow_plimit = 100; in fq_init()
935 q->quantum = 2 * psched_mtu(qdisc_dev(sch)); in fq_init()
936 q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch)); in fq_init()
937 q->flow_refill_delay = msecs_to_jiffies(40); in fq_init()
938 q->flow_max_rate = ~0UL; in fq_init()
939 q->time_next_delayed_flow = ~0ULL; in fq_init()
940 q->rate_enable = 1; in fq_init()
941 q->new_flows.first = NULL; in fq_init()
942 q->old_flows.first = NULL; in fq_init()
943 q->delayed = RB_ROOT; in fq_init()
944 q->fq_root = NULL; in fq_init()
945 q->fq_trees_log = ilog2(1024); in fq_init()
946 q->orphan_mask = 1024 - 1; in fq_init()
947 q->low_rate_threshold = 550000 / 8; in fq_init()
949 q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */ in fq_init()
951 q->horizon = 10ULL * NSEC_PER_SEC; /* 10 seconds */ in fq_init()
952 q->horizon_drop = 1; /* by default, drop packets beyond horizon */ in fq_init()
955 q->ce_threshold = (u64)NSEC_PER_USEC * ~0U; in fq_init()
957 qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC); in fq_init()
962 err = fq_resize(sch, q->fq_trees_log); in fq_init()
970 u64 ce_threshold = q->ce_threshold; in fq_dump()
971 u64 horizon = q->horizon; in fq_dump()
983 if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) || in fq_dump()
984 nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) || in fq_dump()
985 nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) || in fq_dump()
986 nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) || in fq_dump()
987 nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) || in fq_dump()
989 min_t(unsigned long, q->flow_max_rate, ~0U)) || in fq_dump()
991 jiffies_to_usecs(q->flow_refill_delay)) || in fq_dump()
992 nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) || in fq_dump()
994 q->low_rate_threshold) || in fq_dump()
996 nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) || in fq_dump()
997 nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack) || in fq_dump()
999 nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop)) in fq_dump()
1005 return -1; in fq_dump()
1015 st.gc_flows = q->stat_gc_flows; in fq_dump_stats()
1016 st.highprio_packets = q->stat_internal_packets; in fq_dump_stats()
1018 st.throttled = q->stat_throttled; in fq_dump_stats()
1019 st.flows_plimit = q->stat_flows_plimit; in fq_dump_stats()
1020 st.pkts_too_long = q->stat_pkts_too_long; in fq_dump_stats()
1021 st.allocation_errors = q->stat_allocation_errors; in fq_dump_stats()
1022 st.time_next_delayed_flow = q->time_next_delayed_flow + q->timer_slack - in fq_dump_stats()
1024 st.flows = q->flows; in fq_dump_stats()
1025 st.inactive_flows = q->inactive_flows; in fq_dump_stats()
1026 st.throttled_flows = q->throttled_flows; in fq_dump_stats()
1028 q->unthrottle_latency_ns, ~0U); in fq_dump_stats()
1029 st.ce_mark = q->stat_ce_mark; in fq_dump_stats()
1030 st.horizon_drops = q->stat_horizon_drops; in fq_dump_stats()
1031 st.horizon_caps = q->stat_horizon_caps; in fq_dump_stats()
1061 return -ENOMEM; in fq_module_init()