1 /* 2 * Copyright (c) 2003 Patrick McHardy, <kaber@trash.net> 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 2 7 * of the License, or (at your option) any later version. 8 * 9 * 2003-10-17 - Ported from altq 10 */ 11 /* 12 * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved. 13 * 14 * Permission to use, copy, modify, and distribute this software and 15 * its documentation is hereby granted (including for commercial or 16 * for-profit use), provided that both the copyright notice and this 17 * permission notice appear in all copies of the software, derivative 18 * works, or modified versions, and any portions thereof. 19 * 20 * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF 21 * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS 22 * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED 23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 25 * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 28 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 32 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 33 * DAMAGE. 34 * 35 * Carnegie Mellon encourages (but does not require) users of this 36 * software to return any improvements or extensions that they make, 37 * and to grant Carnegie Mellon the rights to redistribute these 38 * changes without encumbrance. 39 */ 40 /* 41 * H-FSC is described in Proceedings of SIGCOMM'97, 42 * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing, 43 * Real-Time and Priority Service" 44 * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng. 45 * 46 * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing. 47 * when a class has an upperlimit, the fit-time is computed from the 48 * upperlimit service curve. the link-sharing scheduler does not schedule 49 * a class whose fit-time exceeds the current time. 50 */ 51 52 #include <linux/kernel.h> 53 #include <linux/module.h> 54 #include <linux/types.h> 55 #include <linux/errno.h> 56 #include <linux/compiler.h> 57 #include <linux/spinlock.h> 58 #include <linux/skbuff.h> 59 #include <linux/string.h> 60 #include <linux/slab.h> 61 #include <linux/list.h> 62 #include <linux/rbtree.h> 63 #include <linux/init.h> 64 #include <linux/rtnetlink.h> 65 #include <linux/pkt_sched.h> 66 #include <net/netlink.h> 67 #include <net/pkt_sched.h> 68 #include <net/pkt_cls.h> 69 #include <asm/div64.h> 70 71 /* 72 * kernel internal service curve representation: 73 * coordinates are given by 64 bit unsigned integers. 74 * x-axis: unit is clock count. 75 * y-axis: unit is byte. 76 * 77 * The service curve parameters are converted to the internal 78 * representation. The slope values are scaled to avoid overflow. 79 * the inverse slope values as well as the y-projection of the 1st 80 * segment are kept in order to avoid 64-bit divide operations 81 * that are expensive on 32-bit architectures. 82 */ 83 84 struct internal_sc { 85 u64 sm1; /* scaled slope of the 1st segment */ 86 u64 ism1; /* scaled inverse-slope of the 1st segment */ 87 u64 dx; /* the x-projection of the 1st segment */ 88 u64 dy; /* the y-projection of the 1st segment */ 89 u64 sm2; /* scaled slope of the 2nd segment */ 90 u64 ism2; /* scaled inverse-slope of the 2nd segment */ 91 }; 92 93 /* runtime service curve */ 94 struct runtime_sc { 95 u64 x; /* current starting position on x-axis */ 96 u64 y; /* current starting position on y-axis */ 97 u64 sm1; /* scaled slope of the 1st segment */ 98 u64 ism1; /* scaled inverse-slope of the 1st segment */ 99 u64 dx; /* the x-projection of the 1st segment */ 100 u64 dy; /* the y-projection of the 1st segment */ 101 u64 sm2; /* scaled slope of the 2nd segment */ 102 u64 ism2; /* scaled inverse-slope of the 2nd segment */ 103 }; 104 105 enum hfsc_class_flags { 106 HFSC_RSC = 0x1, 107 HFSC_FSC = 0x2, 108 HFSC_USC = 0x4 109 }; 110 111 struct hfsc_class { 112 struct Qdisc_class_common cl_common; 113 unsigned int refcnt; /* usage count */ 114 115 struct gnet_stats_basic_packed bstats; 116 struct gnet_stats_queue qstats; 117 struct net_rate_estimator __rcu *rate_est; 118 struct tcf_proto __rcu *filter_list; /* filter list */ 119 struct tcf_block *block; 120 unsigned int filter_cnt; /* filter count */ 121 unsigned int level; /* class level in hierarchy */ 122 123 struct hfsc_sched *sched; /* scheduler data */ 124 struct hfsc_class *cl_parent; /* parent class */ 125 struct list_head siblings; /* sibling classes */ 126 struct list_head children; /* child classes */ 127 struct Qdisc *qdisc; /* leaf qdisc */ 128 129 struct rb_node el_node; /* qdisc's eligible tree member */ 130 struct rb_root vt_tree; /* active children sorted by cl_vt */ 131 struct rb_node vt_node; /* parent's vt_tree member */ 132 struct rb_root cf_tree; /* active children sorted by cl_f */ 133 struct rb_node cf_node; /* parent's cf_heap member */ 134 135 u64 cl_total; /* total work in bytes */ 136 u64 cl_cumul; /* cumulative work in bytes done by 137 real-time criteria */ 138 139 u64 cl_d; /* deadline*/ 140 u64 cl_e; /* eligible time */ 141 u64 cl_vt; /* virtual time */ 142 u64 cl_f; /* time when this class will fit for 143 link-sharing, max(myf, cfmin) */ 144 u64 cl_myf; /* my fit-time (calculated from this 145 class's own upperlimit curve) */ 146 u64 cl_cfmin; /* earliest children's fit-time (used 147 with cl_myf to obtain cl_f) */ 148 u64 cl_cvtmin; /* minimal virtual time among the 149 children fit for link-sharing 150 (monotonic within a period) */ 151 u64 cl_vtadj; /* intra-period cumulative vt 152 adjustment */ 153 u64 cl_cvtoff; /* largest virtual time seen among 154 the children */ 155 156 struct internal_sc cl_rsc; /* internal real-time service curve */ 157 struct internal_sc cl_fsc; /* internal fair service curve */ 158 struct internal_sc cl_usc; /* internal upperlimit service curve */ 159 struct runtime_sc cl_deadline; /* deadline curve */ 160 struct runtime_sc cl_eligible; /* eligible curve */ 161 struct runtime_sc cl_virtual; /* virtual curve */ 162 struct runtime_sc cl_ulimit; /* upperlimit curve */ 163 164 u8 cl_flags; /* which curves are valid */ 165 u32 cl_vtperiod; /* vt period sequence number */ 166 u32 cl_parentperiod;/* parent's vt period sequence number*/ 167 u32 cl_nactive; /* number of active children */ 168 }; 169 170 struct hfsc_sched { 171 u16 defcls; /* default class id */ 172 struct hfsc_class root; /* root class */ 173 struct Qdisc_class_hash clhash; /* class hash */ 174 struct rb_root eligible; /* eligible tree */ 175 struct qdisc_watchdog watchdog; /* watchdog timer */ 176 }; 177 178 #define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */ 179 180 181 /* 182 * eligible tree holds backlogged classes being sorted by their eligible times. 183 * there is one eligible tree per hfsc instance. 184 */ 185 186 static void 187 eltree_insert(struct hfsc_class *cl) 188 { 189 struct rb_node **p = &cl->sched->eligible.rb_node; 190 struct rb_node *parent = NULL; 191 struct hfsc_class *cl1; 192 193 while (*p != NULL) { 194 parent = *p; 195 cl1 = rb_entry(parent, struct hfsc_class, el_node); 196 if (cl->cl_e >= cl1->cl_e) 197 p = &parent->rb_right; 198 else 199 p = &parent->rb_left; 200 } 201 rb_link_node(&cl->el_node, parent, p); 202 rb_insert_color(&cl->el_node, &cl->sched->eligible); 203 } 204 205 static inline void 206 eltree_remove(struct hfsc_class *cl) 207 { 208 rb_erase(&cl->el_node, &cl->sched->eligible); 209 } 210 211 static inline void 212 eltree_update(struct hfsc_class *cl) 213 { 214 eltree_remove(cl); 215 eltree_insert(cl); 216 } 217 218 /* find the class with the minimum deadline among the eligible classes */ 219 static inline struct hfsc_class * 220 eltree_get_mindl(struct hfsc_sched *q, u64 cur_time) 221 { 222 struct hfsc_class *p, *cl = NULL; 223 struct rb_node *n; 224 225 for (n = rb_first(&q->eligible); n != NULL; n = rb_next(n)) { 226 p = rb_entry(n, struct hfsc_class, el_node); 227 if (p->cl_e > cur_time) 228 break; 229 if (cl == NULL || p->cl_d < cl->cl_d) 230 cl = p; 231 } 232 return cl; 233 } 234 235 /* find the class with minimum eligible time among the eligible classes */ 236 static inline struct hfsc_class * 237 eltree_get_minel(struct hfsc_sched *q) 238 { 239 struct rb_node *n; 240 241 n = rb_first(&q->eligible); 242 if (n == NULL) 243 return NULL; 244 return rb_entry(n, struct hfsc_class, el_node); 245 } 246 247 /* 248 * vttree holds holds backlogged child classes being sorted by their virtual 249 * time. each intermediate class has one vttree. 250 */ 251 static void 252 vttree_insert(struct hfsc_class *cl) 253 { 254 struct rb_node **p = &cl->cl_parent->vt_tree.rb_node; 255 struct rb_node *parent = NULL; 256 struct hfsc_class *cl1; 257 258 while (*p != NULL) { 259 parent = *p; 260 cl1 = rb_entry(parent, struct hfsc_class, vt_node); 261 if (cl->cl_vt >= cl1->cl_vt) 262 p = &parent->rb_right; 263 else 264 p = &parent->rb_left; 265 } 266 rb_link_node(&cl->vt_node, parent, p); 267 rb_insert_color(&cl->vt_node, &cl->cl_parent->vt_tree); 268 } 269 270 static inline void 271 vttree_remove(struct hfsc_class *cl) 272 { 273 rb_erase(&cl->vt_node, &cl->cl_parent->vt_tree); 274 } 275 276 static inline void 277 vttree_update(struct hfsc_class *cl) 278 { 279 vttree_remove(cl); 280 vttree_insert(cl); 281 } 282 283 static inline struct hfsc_class * 284 vttree_firstfit(struct hfsc_class *cl, u64 cur_time) 285 { 286 struct hfsc_class *p; 287 struct rb_node *n; 288 289 for (n = rb_first(&cl->vt_tree); n != NULL; n = rb_next(n)) { 290 p = rb_entry(n, struct hfsc_class, vt_node); 291 if (p->cl_f <= cur_time) 292 return p; 293 } 294 return NULL; 295 } 296 297 /* 298 * get the leaf class with the minimum vt in the hierarchy 299 */ 300 static struct hfsc_class * 301 vttree_get_minvt(struct hfsc_class *cl, u64 cur_time) 302 { 303 /* if root-class's cfmin is bigger than cur_time nothing to do */ 304 if (cl->cl_cfmin > cur_time) 305 return NULL; 306 307 while (cl->level > 0) { 308 cl = vttree_firstfit(cl, cur_time); 309 if (cl == NULL) 310 return NULL; 311 /* 312 * update parent's cl_cvtmin. 313 */ 314 if (cl->cl_parent->cl_cvtmin < cl->cl_vt) 315 cl->cl_parent->cl_cvtmin = cl->cl_vt; 316 } 317 return cl; 318 } 319 320 static void 321 cftree_insert(struct hfsc_class *cl) 322 { 323 struct rb_node **p = &cl->cl_parent->cf_tree.rb_node; 324 struct rb_node *parent = NULL; 325 struct hfsc_class *cl1; 326 327 while (*p != NULL) { 328 parent = *p; 329 cl1 = rb_entry(parent, struct hfsc_class, cf_node); 330 if (cl->cl_f >= cl1->cl_f) 331 p = &parent->rb_right; 332 else 333 p = &parent->rb_left; 334 } 335 rb_link_node(&cl->cf_node, parent, p); 336 rb_insert_color(&cl->cf_node, &cl->cl_parent->cf_tree); 337 } 338 339 static inline void 340 cftree_remove(struct hfsc_class *cl) 341 { 342 rb_erase(&cl->cf_node, &cl->cl_parent->cf_tree); 343 } 344 345 static inline void 346 cftree_update(struct hfsc_class *cl) 347 { 348 cftree_remove(cl); 349 cftree_insert(cl); 350 } 351 352 /* 353 * service curve support functions 354 * 355 * external service curve parameters 356 * m: bps 357 * d: us 358 * internal service curve parameters 359 * sm: (bytes/psched_us) << SM_SHIFT 360 * ism: (psched_us/byte) << ISM_SHIFT 361 * dx: psched_us 362 * 363 * The clock source resolution with ktime and PSCHED_SHIFT 10 is 1.024us. 364 * 365 * sm and ism are scaled in order to keep effective digits. 366 * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective 367 * digits in decimal using the following table. 368 * 369 * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps 370 * ------------+------------------------------------------------------- 371 * bytes/1.024us 12.8e-3 128e-3 1280e-3 12800e-3 128000e-3 372 * 373 * 1.024us/byte 78.125 7.8125 0.78125 0.078125 0.0078125 374 * 375 * So, for PSCHED_SHIFT 10 we need: SM_SHIFT 20, ISM_SHIFT 18. 376 */ 377 #define SM_SHIFT (30 - PSCHED_SHIFT) 378 #define ISM_SHIFT (8 + PSCHED_SHIFT) 379 380 #define SM_MASK ((1ULL << SM_SHIFT) - 1) 381 #define ISM_MASK ((1ULL << ISM_SHIFT) - 1) 382 383 static inline u64 384 seg_x2y(u64 x, u64 sm) 385 { 386 u64 y; 387 388 /* 389 * compute 390 * y = x * sm >> SM_SHIFT 391 * but divide it for the upper and lower bits to avoid overflow 392 */ 393 y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT); 394 return y; 395 } 396 397 static inline u64 398 seg_y2x(u64 y, u64 ism) 399 { 400 u64 x; 401 402 if (y == 0) 403 x = 0; 404 else if (ism == HT_INFINITY) 405 x = HT_INFINITY; 406 else { 407 x = (y >> ISM_SHIFT) * ism 408 + (((y & ISM_MASK) * ism) >> ISM_SHIFT); 409 } 410 return x; 411 } 412 413 /* Convert m (bps) into sm (bytes/psched us) */ 414 static u64 415 m2sm(u32 m) 416 { 417 u64 sm; 418 419 sm = ((u64)m << SM_SHIFT); 420 sm += PSCHED_TICKS_PER_SEC - 1; 421 do_div(sm, PSCHED_TICKS_PER_SEC); 422 return sm; 423 } 424 425 /* convert m (bps) into ism (psched us/byte) */ 426 static u64 427 m2ism(u32 m) 428 { 429 u64 ism; 430 431 if (m == 0) 432 ism = HT_INFINITY; 433 else { 434 ism = ((u64)PSCHED_TICKS_PER_SEC << ISM_SHIFT); 435 ism += m - 1; 436 do_div(ism, m); 437 } 438 return ism; 439 } 440 441 /* convert d (us) into dx (psched us) */ 442 static u64 443 d2dx(u32 d) 444 { 445 u64 dx; 446 447 dx = ((u64)d * PSCHED_TICKS_PER_SEC); 448 dx += USEC_PER_SEC - 1; 449 do_div(dx, USEC_PER_SEC); 450 return dx; 451 } 452 453 /* convert sm (bytes/psched us) into m (bps) */ 454 static u32 455 sm2m(u64 sm) 456 { 457 u64 m; 458 459 m = (sm * PSCHED_TICKS_PER_SEC) >> SM_SHIFT; 460 return (u32)m; 461 } 462 463 /* convert dx (psched us) into d (us) */ 464 static u32 465 dx2d(u64 dx) 466 { 467 u64 d; 468 469 d = dx * USEC_PER_SEC; 470 do_div(d, PSCHED_TICKS_PER_SEC); 471 return (u32)d; 472 } 473 474 static void 475 sc2isc(struct tc_service_curve *sc, struct internal_sc *isc) 476 { 477 isc->sm1 = m2sm(sc->m1); 478 isc->ism1 = m2ism(sc->m1); 479 isc->dx = d2dx(sc->d); 480 isc->dy = seg_x2y(isc->dx, isc->sm1); 481 isc->sm2 = m2sm(sc->m2); 482 isc->ism2 = m2ism(sc->m2); 483 } 484 485 /* 486 * initialize the runtime service curve with the given internal 487 * service curve starting at (x, y). 488 */ 489 static void 490 rtsc_init(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y) 491 { 492 rtsc->x = x; 493 rtsc->y = y; 494 rtsc->sm1 = isc->sm1; 495 rtsc->ism1 = isc->ism1; 496 rtsc->dx = isc->dx; 497 rtsc->dy = isc->dy; 498 rtsc->sm2 = isc->sm2; 499 rtsc->ism2 = isc->ism2; 500 } 501 502 /* 503 * calculate the y-projection of the runtime service curve by the 504 * given x-projection value 505 */ 506 static u64 507 rtsc_y2x(struct runtime_sc *rtsc, u64 y) 508 { 509 u64 x; 510 511 if (y < rtsc->y) 512 x = rtsc->x; 513 else if (y <= rtsc->y + rtsc->dy) { 514 /* x belongs to the 1st segment */ 515 if (rtsc->dy == 0) 516 x = rtsc->x + rtsc->dx; 517 else 518 x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1); 519 } else { 520 /* x belongs to the 2nd segment */ 521 x = rtsc->x + rtsc->dx 522 + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2); 523 } 524 return x; 525 } 526 527 static u64 528 rtsc_x2y(struct runtime_sc *rtsc, u64 x) 529 { 530 u64 y; 531 532 if (x <= rtsc->x) 533 y = rtsc->y; 534 else if (x <= rtsc->x + rtsc->dx) 535 /* y belongs to the 1st segment */ 536 y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1); 537 else 538 /* y belongs to the 2nd segment */ 539 y = rtsc->y + rtsc->dy 540 + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2); 541 return y; 542 } 543 544 /* 545 * update the runtime service curve by taking the minimum of the current 546 * runtime service curve and the service curve starting at (x, y). 547 */ 548 static void 549 rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y) 550 { 551 u64 y1, y2, dx, dy; 552 u32 dsm; 553 554 if (isc->sm1 <= isc->sm2) { 555 /* service curve is convex */ 556 y1 = rtsc_x2y(rtsc, x); 557 if (y1 < y) 558 /* the current rtsc is smaller */ 559 return; 560 rtsc->x = x; 561 rtsc->y = y; 562 return; 563 } 564 565 /* 566 * service curve is concave 567 * compute the two y values of the current rtsc 568 * y1: at x 569 * y2: at (x + dx) 570 */ 571 y1 = rtsc_x2y(rtsc, x); 572 if (y1 <= y) { 573 /* rtsc is below isc, no change to rtsc */ 574 return; 575 } 576 577 y2 = rtsc_x2y(rtsc, x + isc->dx); 578 if (y2 >= y + isc->dy) { 579 /* rtsc is above isc, replace rtsc by isc */ 580 rtsc->x = x; 581 rtsc->y = y; 582 rtsc->dx = isc->dx; 583 rtsc->dy = isc->dy; 584 return; 585 } 586 587 /* 588 * the two curves intersect 589 * compute the offsets (dx, dy) using the reverse 590 * function of seg_x2y() 591 * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y) 592 */ 593 dx = (y1 - y) << SM_SHIFT; 594 dsm = isc->sm1 - isc->sm2; 595 do_div(dx, dsm); 596 /* 597 * check if (x, y1) belongs to the 1st segment of rtsc. 598 * if so, add the offset. 599 */ 600 if (rtsc->x + rtsc->dx > x) 601 dx += rtsc->x + rtsc->dx - x; 602 dy = seg_x2y(dx, isc->sm1); 603 604 rtsc->x = x; 605 rtsc->y = y; 606 rtsc->dx = dx; 607 rtsc->dy = dy; 608 } 609 610 static void 611 init_ed(struct hfsc_class *cl, unsigned int next_len) 612 { 613 u64 cur_time = psched_get_time(); 614 615 /* update the deadline curve */ 616 rtsc_min(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul); 617 618 /* 619 * update the eligible curve. 620 * for concave, it is equal to the deadline curve. 621 * for convex, it is a linear curve with slope m2. 622 */ 623 cl->cl_eligible = cl->cl_deadline; 624 if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) { 625 cl->cl_eligible.dx = 0; 626 cl->cl_eligible.dy = 0; 627 } 628 629 /* compute e and d */ 630 cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); 631 cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); 632 633 eltree_insert(cl); 634 } 635 636 static void 637 update_ed(struct hfsc_class *cl, unsigned int next_len) 638 { 639 cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); 640 cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); 641 642 eltree_update(cl); 643 } 644 645 static inline void 646 update_d(struct hfsc_class *cl, unsigned int next_len) 647 { 648 cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); 649 } 650 651 static inline void 652 update_cfmin(struct hfsc_class *cl) 653 { 654 struct rb_node *n = rb_first(&cl->cf_tree); 655 struct hfsc_class *p; 656 657 if (n == NULL) { 658 cl->cl_cfmin = 0; 659 return; 660 } 661 p = rb_entry(n, struct hfsc_class, cf_node); 662 cl->cl_cfmin = p->cl_f; 663 } 664 665 static void 666 init_vf(struct hfsc_class *cl, unsigned int len) 667 { 668 struct hfsc_class *max_cl; 669 struct rb_node *n; 670 u64 vt, f, cur_time; 671 int go_active; 672 673 cur_time = 0; 674 go_active = 1; 675 for (; cl->cl_parent != NULL; cl = cl->cl_parent) { 676 if (go_active && cl->cl_nactive++ == 0) 677 go_active = 1; 678 else 679 go_active = 0; 680 681 if (go_active) { 682 n = rb_last(&cl->cl_parent->vt_tree); 683 if (n != NULL) { 684 max_cl = rb_entry(n, struct hfsc_class, vt_node); 685 /* 686 * set vt to the average of the min and max 687 * classes. if the parent's period didn't 688 * change, don't decrease vt of the class. 689 */ 690 vt = max_cl->cl_vt; 691 if (cl->cl_parent->cl_cvtmin != 0) 692 vt = (cl->cl_parent->cl_cvtmin + vt)/2; 693 694 if (cl->cl_parent->cl_vtperiod != 695 cl->cl_parentperiod || vt > cl->cl_vt) 696 cl->cl_vt = vt; 697 } else { 698 /* 699 * first child for a new parent backlog period. 700 * initialize cl_vt to the highest value seen 701 * among the siblings. this is analogous to 702 * what cur_time would provide in realtime case. 703 */ 704 cl->cl_vt = cl->cl_parent->cl_cvtoff; 705 cl->cl_parent->cl_cvtmin = 0; 706 } 707 708 /* update the virtual curve */ 709 rtsc_min(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); 710 cl->cl_vtadj = 0; 711 712 cl->cl_vtperiod++; /* increment vt period */ 713 cl->cl_parentperiod = cl->cl_parent->cl_vtperiod; 714 if (cl->cl_parent->cl_nactive == 0) 715 cl->cl_parentperiod++; 716 cl->cl_f = 0; 717 718 vttree_insert(cl); 719 cftree_insert(cl); 720 721 if (cl->cl_flags & HFSC_USC) { 722 /* class has upper limit curve */ 723 if (cur_time == 0) 724 cur_time = psched_get_time(); 725 726 /* update the ulimit curve */ 727 rtsc_min(&cl->cl_ulimit, &cl->cl_usc, cur_time, 728 cl->cl_total); 729 /* compute myf */ 730 cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, 731 cl->cl_total); 732 } 733 } 734 735 f = max(cl->cl_myf, cl->cl_cfmin); 736 if (f != cl->cl_f) { 737 cl->cl_f = f; 738 cftree_update(cl); 739 } 740 update_cfmin(cl->cl_parent); 741 } 742 } 743 744 static void 745 update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) 746 { 747 u64 f; /* , myf_bound, delta; */ 748 int go_passive = 0; 749 750 if (cl->qdisc->q.qlen == 0 && cl->cl_flags & HFSC_FSC) 751 go_passive = 1; 752 753 for (; cl->cl_parent != NULL; cl = cl->cl_parent) { 754 cl->cl_total += len; 755 756 if (!(cl->cl_flags & HFSC_FSC) || cl->cl_nactive == 0) 757 continue; 758 759 if (go_passive && --cl->cl_nactive == 0) 760 go_passive = 1; 761 else 762 go_passive = 0; 763 764 /* update vt */ 765 cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) + cl->cl_vtadj; 766 767 /* 768 * if vt of the class is smaller than cvtmin, 769 * the class was skipped in the past due to non-fit. 770 * if so, we need to adjust vtadj. 771 */ 772 if (cl->cl_vt < cl->cl_parent->cl_cvtmin) { 773 cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt; 774 cl->cl_vt = cl->cl_parent->cl_cvtmin; 775 } 776 777 if (go_passive) { 778 /* no more active child, going passive */ 779 780 /* update cvtoff of the parent class */ 781 if (cl->cl_vt > cl->cl_parent->cl_cvtoff) 782 cl->cl_parent->cl_cvtoff = cl->cl_vt; 783 784 /* remove this class from the vt tree */ 785 vttree_remove(cl); 786 787 cftree_remove(cl); 788 update_cfmin(cl->cl_parent); 789 790 continue; 791 } 792 793 /* update the vt tree */ 794 vttree_update(cl); 795 796 /* update f */ 797 if (cl->cl_flags & HFSC_USC) { 798 cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total); 799 #if 0 800 cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit, 801 cl->cl_total); 802 /* 803 * This code causes classes to stay way under their 804 * limit when multiple classes are used at gigabit 805 * speed. needs investigation. -kaber 806 */ 807 /* 808 * if myf lags behind by more than one clock tick 809 * from the current time, adjust myfadj to prevent 810 * a rate-limited class from going greedy. 811 * in a steady state under rate-limiting, myf 812 * fluctuates within one clock tick. 813 */ 814 myf_bound = cur_time - PSCHED_JIFFIE2US(1); 815 if (cl->cl_myf < myf_bound) { 816 delta = cur_time - cl->cl_myf; 817 cl->cl_myfadj += delta; 818 cl->cl_myf += delta; 819 } 820 #endif 821 } 822 823 f = max(cl->cl_myf, cl->cl_cfmin); 824 if (f != cl->cl_f) { 825 cl->cl_f = f; 826 cftree_update(cl); 827 update_cfmin(cl->cl_parent); 828 } 829 } 830 } 831 832 static void 833 set_active(struct hfsc_class *cl, unsigned int len) 834 { 835 if (cl->cl_flags & HFSC_RSC) 836 init_ed(cl, len); 837 if (cl->cl_flags & HFSC_FSC) 838 init_vf(cl, len); 839 840 } 841 842 static void 843 set_passive(struct hfsc_class *cl) 844 { 845 if (cl->cl_flags & HFSC_RSC) 846 eltree_remove(cl); 847 848 /* 849 * vttree is now handled in update_vf() so that update_vf(cl, 0, 0) 850 * needs to be called explicitly to remove a class from vttree. 851 */ 852 } 853 854 static unsigned int 855 qdisc_peek_len(struct Qdisc *sch) 856 { 857 struct sk_buff *skb; 858 unsigned int len; 859 860 skb = sch->ops->peek(sch); 861 if (unlikely(skb == NULL)) { 862 qdisc_warn_nonwc("qdisc_peek_len", sch); 863 return 0; 864 } 865 len = qdisc_pkt_len(skb); 866 867 return len; 868 } 869 870 static void 871 hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl) 872 { 873 unsigned int len = cl->qdisc->q.qlen; 874 unsigned int backlog = cl->qdisc->qstats.backlog; 875 876 qdisc_reset(cl->qdisc); 877 qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); 878 } 879 880 static void 881 hfsc_adjust_levels(struct hfsc_class *cl) 882 { 883 struct hfsc_class *p; 884 unsigned int level; 885 886 do { 887 level = 0; 888 list_for_each_entry(p, &cl->children, siblings) { 889 if (p->level >= level) 890 level = p->level + 1; 891 } 892 cl->level = level; 893 } while ((cl = cl->cl_parent) != NULL); 894 } 895 896 static inline struct hfsc_class * 897 hfsc_find_class(u32 classid, struct Qdisc *sch) 898 { 899 struct hfsc_sched *q = qdisc_priv(sch); 900 struct Qdisc_class_common *clc; 901 902 clc = qdisc_class_find(&q->clhash, classid); 903 if (clc == NULL) 904 return NULL; 905 return container_of(clc, struct hfsc_class, cl_common); 906 } 907 908 static void 909 hfsc_change_rsc(struct hfsc_class *cl, struct tc_service_curve *rsc, 910 u64 cur_time) 911 { 912 sc2isc(rsc, &cl->cl_rsc); 913 rtsc_init(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul); 914 cl->cl_eligible = cl->cl_deadline; 915 if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) { 916 cl->cl_eligible.dx = 0; 917 cl->cl_eligible.dy = 0; 918 } 919 cl->cl_flags |= HFSC_RSC; 920 } 921 922 static void 923 hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc) 924 { 925 sc2isc(fsc, &cl->cl_fsc); 926 rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); 927 cl->cl_flags |= HFSC_FSC; 928 } 929 930 static void 931 hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc, 932 u64 cur_time) 933 { 934 sc2isc(usc, &cl->cl_usc); 935 rtsc_init(&cl->cl_ulimit, &cl->cl_usc, cur_time, cl->cl_total); 936 cl->cl_flags |= HFSC_USC; 937 } 938 939 static const struct nla_policy hfsc_policy[TCA_HFSC_MAX + 1] = { 940 [TCA_HFSC_RSC] = { .len = sizeof(struct tc_service_curve) }, 941 [TCA_HFSC_FSC] = { .len = sizeof(struct tc_service_curve) }, 942 [TCA_HFSC_USC] = { .len = sizeof(struct tc_service_curve) }, 943 }; 944 945 static int 946 hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 947 struct nlattr **tca, unsigned long *arg) 948 { 949 struct hfsc_sched *q = qdisc_priv(sch); 950 struct hfsc_class *cl = (struct hfsc_class *)*arg; 951 struct hfsc_class *parent = NULL; 952 struct nlattr *opt = tca[TCA_OPTIONS]; 953 struct nlattr *tb[TCA_HFSC_MAX + 1]; 954 struct tc_service_curve *rsc = NULL, *fsc = NULL, *usc = NULL; 955 u64 cur_time; 956 int err; 957 958 if (opt == NULL) 959 return -EINVAL; 960 961 err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, hfsc_policy, NULL); 962 if (err < 0) 963 return err; 964 965 if (tb[TCA_HFSC_RSC]) { 966 rsc = nla_data(tb[TCA_HFSC_RSC]); 967 if (rsc->m1 == 0 && rsc->m2 == 0) 968 rsc = NULL; 969 } 970 971 if (tb[TCA_HFSC_FSC]) { 972 fsc = nla_data(tb[TCA_HFSC_FSC]); 973 if (fsc->m1 == 0 && fsc->m2 == 0) 974 fsc = NULL; 975 } 976 977 if (tb[TCA_HFSC_USC]) { 978 usc = nla_data(tb[TCA_HFSC_USC]); 979 if (usc->m1 == 0 && usc->m2 == 0) 980 usc = NULL; 981 } 982 983 if (cl != NULL) { 984 if (parentid) { 985 if (cl->cl_parent && 986 cl->cl_parent->cl_common.classid != parentid) 987 return -EINVAL; 988 if (cl->cl_parent == NULL && parentid != TC_H_ROOT) 989 return -EINVAL; 990 } 991 cur_time = psched_get_time(); 992 993 if (tca[TCA_RATE]) { 994 err = gen_replace_estimator(&cl->bstats, NULL, 995 &cl->rate_est, 996 NULL, 997 qdisc_root_sleeping_running(sch), 998 tca[TCA_RATE]); 999 if (err) 1000 return err; 1001 } 1002 1003 sch_tree_lock(sch); 1004 if (rsc != NULL) 1005 hfsc_change_rsc(cl, rsc, cur_time); 1006 if (fsc != NULL) 1007 hfsc_change_fsc(cl, fsc); 1008 if (usc != NULL) 1009 hfsc_change_usc(cl, usc, cur_time); 1010 1011 if (cl->qdisc->q.qlen != 0) { 1012 if (cl->cl_flags & HFSC_RSC) 1013 update_ed(cl, qdisc_peek_len(cl->qdisc)); 1014 if (cl->cl_flags & HFSC_FSC) 1015 update_vf(cl, 0, cur_time); 1016 } 1017 sch_tree_unlock(sch); 1018 1019 return 0; 1020 } 1021 1022 if (parentid == TC_H_ROOT) 1023 return -EEXIST; 1024 1025 parent = &q->root; 1026 if (parentid) { 1027 parent = hfsc_find_class(parentid, sch); 1028 if (parent == NULL) 1029 return -ENOENT; 1030 } 1031 1032 if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0) 1033 return -EINVAL; 1034 if (hfsc_find_class(classid, sch)) 1035 return -EEXIST; 1036 1037 if (rsc == NULL && fsc == NULL) 1038 return -EINVAL; 1039 1040 cl = kzalloc(sizeof(struct hfsc_class), GFP_KERNEL); 1041 if (cl == NULL) 1042 return -ENOBUFS; 1043 1044 err = tcf_block_get(&cl->block, &cl->filter_list); 1045 if (err) { 1046 kfree(cl); 1047 return err; 1048 } 1049 1050 if (tca[TCA_RATE]) { 1051 err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, 1052 NULL, 1053 qdisc_root_sleeping_running(sch), 1054 tca[TCA_RATE]); 1055 if (err) { 1056 tcf_block_put(cl->block); 1057 kfree(cl); 1058 return err; 1059 } 1060 } 1061 1062 if (rsc != NULL) 1063 hfsc_change_rsc(cl, rsc, 0); 1064 if (fsc != NULL) 1065 hfsc_change_fsc(cl, fsc); 1066 if (usc != NULL) 1067 hfsc_change_usc(cl, usc, 0); 1068 1069 cl->cl_common.classid = classid; 1070 cl->refcnt = 1; 1071 cl->sched = q; 1072 cl->cl_parent = parent; 1073 cl->qdisc = qdisc_create_dflt(sch->dev_queue, 1074 &pfifo_qdisc_ops, classid); 1075 if (cl->qdisc == NULL) 1076 cl->qdisc = &noop_qdisc; 1077 else 1078 qdisc_hash_add(cl->qdisc, true); 1079 INIT_LIST_HEAD(&cl->children); 1080 cl->vt_tree = RB_ROOT; 1081 cl->cf_tree = RB_ROOT; 1082 1083 sch_tree_lock(sch); 1084 qdisc_class_hash_insert(&q->clhash, &cl->cl_common); 1085 list_add_tail(&cl->siblings, &parent->children); 1086 if (parent->level == 0) 1087 hfsc_purge_queue(sch, parent); 1088 hfsc_adjust_levels(parent); 1089 sch_tree_unlock(sch); 1090 1091 qdisc_class_hash_grow(sch, &q->clhash); 1092 1093 *arg = (unsigned long)cl; 1094 return 0; 1095 } 1096 1097 static void 1098 hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) 1099 { 1100 struct hfsc_sched *q = qdisc_priv(sch); 1101 1102 tcf_block_put(cl->block); 1103 qdisc_destroy(cl->qdisc); 1104 gen_kill_estimator(&cl->rate_est); 1105 if (cl != &q->root) 1106 kfree(cl); 1107 } 1108 1109 static int 1110 hfsc_delete_class(struct Qdisc *sch, unsigned long arg) 1111 { 1112 struct hfsc_sched *q = qdisc_priv(sch); 1113 struct hfsc_class *cl = (struct hfsc_class *)arg; 1114 1115 if (cl->level > 0 || cl->filter_cnt > 0 || cl == &q->root) 1116 return -EBUSY; 1117 1118 sch_tree_lock(sch); 1119 1120 list_del(&cl->siblings); 1121 hfsc_adjust_levels(cl->cl_parent); 1122 1123 hfsc_purge_queue(sch, cl); 1124 qdisc_class_hash_remove(&q->clhash, &cl->cl_common); 1125 1126 BUG_ON(--cl->refcnt == 0); 1127 /* 1128 * This shouldn't happen: we "hold" one cops->get() when called 1129 * from tc_ctl_tclass; the destroy method is done from cops->put(). 1130 */ 1131 1132 sch_tree_unlock(sch); 1133 return 0; 1134 } 1135 1136 static struct hfsc_class * 1137 hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) 1138 { 1139 struct hfsc_sched *q = qdisc_priv(sch); 1140 struct hfsc_class *head, *cl; 1141 struct tcf_result res; 1142 struct tcf_proto *tcf; 1143 int result; 1144 1145 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0 && 1146 (cl = hfsc_find_class(skb->priority, sch)) != NULL) 1147 if (cl->level == 0) 1148 return cl; 1149 1150 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 1151 head = &q->root; 1152 tcf = rcu_dereference_bh(q->root.filter_list); 1153 while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) { 1154 #ifdef CONFIG_NET_CLS_ACT 1155 switch (result) { 1156 case TC_ACT_QUEUED: 1157 case TC_ACT_STOLEN: 1158 case TC_ACT_TRAP: 1159 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 1160 case TC_ACT_SHOT: 1161 return NULL; 1162 } 1163 #endif 1164 cl = (struct hfsc_class *)res.class; 1165 if (!cl) { 1166 cl = hfsc_find_class(res.classid, sch); 1167 if (!cl) 1168 break; /* filter selected invalid classid */ 1169 if (cl->level >= head->level) 1170 break; /* filter may only point downwards */ 1171 } 1172 1173 if (cl->level == 0) 1174 return cl; /* hit leaf class */ 1175 1176 /* apply inner filter chain */ 1177 tcf = rcu_dereference_bh(cl->filter_list); 1178 head = cl; 1179 } 1180 1181 /* classification failed, try default class */ 1182 cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch); 1183 if (cl == NULL || cl->level > 0) 1184 return NULL; 1185 1186 return cl; 1187 } 1188 1189 static int 1190 hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 1191 struct Qdisc **old) 1192 { 1193 struct hfsc_class *cl = (struct hfsc_class *)arg; 1194 1195 if (cl->level > 0) 1196 return -EINVAL; 1197 if (new == NULL) { 1198 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, 1199 cl->cl_common.classid); 1200 if (new == NULL) 1201 new = &noop_qdisc; 1202 } 1203 1204 *old = qdisc_replace(sch, new, &cl->qdisc); 1205 return 0; 1206 } 1207 1208 static struct Qdisc * 1209 hfsc_class_leaf(struct Qdisc *sch, unsigned long arg) 1210 { 1211 struct hfsc_class *cl = (struct hfsc_class *)arg; 1212 1213 if (cl->level == 0) 1214 return cl->qdisc; 1215 1216 return NULL; 1217 } 1218 1219 static void 1220 hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg) 1221 { 1222 struct hfsc_class *cl = (struct hfsc_class *)arg; 1223 1224 if (cl->qdisc->q.qlen == 0) { 1225 update_vf(cl, 0, 0); 1226 set_passive(cl); 1227 } 1228 } 1229 1230 static unsigned long 1231 hfsc_get_class(struct Qdisc *sch, u32 classid) 1232 { 1233 struct hfsc_class *cl = hfsc_find_class(classid, sch); 1234 1235 if (cl != NULL) 1236 cl->refcnt++; 1237 1238 return (unsigned long)cl; 1239 } 1240 1241 static void 1242 hfsc_put_class(struct Qdisc *sch, unsigned long arg) 1243 { 1244 struct hfsc_class *cl = (struct hfsc_class *)arg; 1245 1246 if (--cl->refcnt == 0) 1247 hfsc_destroy_class(sch, cl); 1248 } 1249 1250 static unsigned long 1251 hfsc_bind_tcf(struct Qdisc *sch, unsigned long parent, u32 classid) 1252 { 1253 struct hfsc_class *p = (struct hfsc_class *)parent; 1254 struct hfsc_class *cl = hfsc_find_class(classid, sch); 1255 1256 if (cl != NULL) { 1257 if (p != NULL && p->level <= cl->level) 1258 return 0; 1259 cl->filter_cnt++; 1260 } 1261 1262 return (unsigned long)cl; 1263 } 1264 1265 static void 1266 hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg) 1267 { 1268 struct hfsc_class *cl = (struct hfsc_class *)arg; 1269 1270 cl->filter_cnt--; 1271 } 1272 1273 static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg) 1274 { 1275 struct hfsc_sched *q = qdisc_priv(sch); 1276 struct hfsc_class *cl = (struct hfsc_class *)arg; 1277 1278 if (cl == NULL) 1279 cl = &q->root; 1280 1281 return cl->block; 1282 } 1283 1284 static int 1285 hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc) 1286 { 1287 struct tc_service_curve tsc; 1288 1289 tsc.m1 = sm2m(sc->sm1); 1290 tsc.d = dx2d(sc->dx); 1291 tsc.m2 = sm2m(sc->sm2); 1292 if (nla_put(skb, attr, sizeof(tsc), &tsc)) 1293 goto nla_put_failure; 1294 1295 return skb->len; 1296 1297 nla_put_failure: 1298 return -1; 1299 } 1300 1301 static int 1302 hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl) 1303 { 1304 if ((cl->cl_flags & HFSC_RSC) && 1305 (hfsc_dump_sc(skb, TCA_HFSC_RSC, &cl->cl_rsc) < 0)) 1306 goto nla_put_failure; 1307 1308 if ((cl->cl_flags & HFSC_FSC) && 1309 (hfsc_dump_sc(skb, TCA_HFSC_FSC, &cl->cl_fsc) < 0)) 1310 goto nla_put_failure; 1311 1312 if ((cl->cl_flags & HFSC_USC) && 1313 (hfsc_dump_sc(skb, TCA_HFSC_USC, &cl->cl_usc) < 0)) 1314 goto nla_put_failure; 1315 1316 return skb->len; 1317 1318 nla_put_failure: 1319 return -1; 1320 } 1321 1322 static int 1323 hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, 1324 struct tcmsg *tcm) 1325 { 1326 struct hfsc_class *cl = (struct hfsc_class *)arg; 1327 struct nlattr *nest; 1328 1329 tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->cl_common.classid : 1330 TC_H_ROOT; 1331 tcm->tcm_handle = cl->cl_common.classid; 1332 if (cl->level == 0) 1333 tcm->tcm_info = cl->qdisc->handle; 1334 1335 nest = nla_nest_start(skb, TCA_OPTIONS); 1336 if (nest == NULL) 1337 goto nla_put_failure; 1338 if (hfsc_dump_curves(skb, cl) < 0) 1339 goto nla_put_failure; 1340 return nla_nest_end(skb, nest); 1341 1342 nla_put_failure: 1343 nla_nest_cancel(skb, nest); 1344 return -EMSGSIZE; 1345 } 1346 1347 static int 1348 hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, 1349 struct gnet_dump *d) 1350 { 1351 struct hfsc_class *cl = (struct hfsc_class *)arg; 1352 struct tc_hfsc_stats xstats; 1353 1354 cl->qstats.backlog = cl->qdisc->qstats.backlog; 1355 xstats.level = cl->level; 1356 xstats.period = cl->cl_vtperiod; 1357 xstats.work = cl->cl_total; 1358 xstats.rtwork = cl->cl_cumul; 1359 1360 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || 1361 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || 1362 gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0) 1363 return -1; 1364 1365 return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); 1366 } 1367 1368 1369 1370 static void 1371 hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg) 1372 { 1373 struct hfsc_sched *q = qdisc_priv(sch); 1374 struct hfsc_class *cl; 1375 unsigned int i; 1376 1377 if (arg->stop) 1378 return; 1379 1380 for (i = 0; i < q->clhash.hashsize; i++) { 1381 hlist_for_each_entry(cl, &q->clhash.hash[i], 1382 cl_common.hnode) { 1383 if (arg->count < arg->skip) { 1384 arg->count++; 1385 continue; 1386 } 1387 if (arg->fn(sch, (unsigned long)cl, arg) < 0) { 1388 arg->stop = 1; 1389 return; 1390 } 1391 arg->count++; 1392 } 1393 } 1394 } 1395 1396 static void 1397 hfsc_schedule_watchdog(struct Qdisc *sch) 1398 { 1399 struct hfsc_sched *q = qdisc_priv(sch); 1400 struct hfsc_class *cl; 1401 u64 next_time = 0; 1402 1403 cl = eltree_get_minel(q); 1404 if (cl) 1405 next_time = cl->cl_e; 1406 if (q->root.cl_cfmin != 0) { 1407 if (next_time == 0 || next_time > q->root.cl_cfmin) 1408 next_time = q->root.cl_cfmin; 1409 } 1410 WARN_ON(next_time == 0); 1411 qdisc_watchdog_schedule(&q->watchdog, next_time); 1412 } 1413 1414 static int 1415 hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) 1416 { 1417 struct hfsc_sched *q = qdisc_priv(sch); 1418 struct tc_hfsc_qopt *qopt; 1419 int err; 1420 1421 if (opt == NULL || nla_len(opt) < sizeof(*qopt)) 1422 return -EINVAL; 1423 qopt = nla_data(opt); 1424 1425 q->defcls = qopt->defcls; 1426 err = qdisc_class_hash_init(&q->clhash); 1427 if (err < 0) 1428 return err; 1429 q->eligible = RB_ROOT; 1430 1431 err = tcf_block_get(&q->root.block, &q->root.filter_list); 1432 if (err) 1433 goto err_tcf; 1434 1435 q->root.cl_common.classid = sch->handle; 1436 q->root.refcnt = 1; 1437 q->root.sched = q; 1438 q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, 1439 sch->handle); 1440 if (q->root.qdisc == NULL) 1441 q->root.qdisc = &noop_qdisc; 1442 else 1443 qdisc_hash_add(q->root.qdisc, true); 1444 INIT_LIST_HEAD(&q->root.children); 1445 q->root.vt_tree = RB_ROOT; 1446 q->root.cf_tree = RB_ROOT; 1447 1448 qdisc_class_hash_insert(&q->clhash, &q->root.cl_common); 1449 qdisc_class_hash_grow(sch, &q->clhash); 1450 1451 qdisc_watchdog_init(&q->watchdog, sch); 1452 1453 return 0; 1454 1455 err_tcf: 1456 qdisc_class_hash_destroy(&q->clhash); 1457 return err; 1458 } 1459 1460 static int 1461 hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt) 1462 { 1463 struct hfsc_sched *q = qdisc_priv(sch); 1464 struct tc_hfsc_qopt *qopt; 1465 1466 if (opt == NULL || nla_len(opt) < sizeof(*qopt)) 1467 return -EINVAL; 1468 qopt = nla_data(opt); 1469 1470 sch_tree_lock(sch); 1471 q->defcls = qopt->defcls; 1472 sch_tree_unlock(sch); 1473 1474 return 0; 1475 } 1476 1477 static void 1478 hfsc_reset_class(struct hfsc_class *cl) 1479 { 1480 cl->cl_total = 0; 1481 cl->cl_cumul = 0; 1482 cl->cl_d = 0; 1483 cl->cl_e = 0; 1484 cl->cl_vt = 0; 1485 cl->cl_vtadj = 0; 1486 cl->cl_cvtmin = 0; 1487 cl->cl_cvtoff = 0; 1488 cl->cl_vtperiod = 0; 1489 cl->cl_parentperiod = 0; 1490 cl->cl_f = 0; 1491 cl->cl_myf = 0; 1492 cl->cl_cfmin = 0; 1493 cl->cl_nactive = 0; 1494 1495 cl->vt_tree = RB_ROOT; 1496 cl->cf_tree = RB_ROOT; 1497 qdisc_reset(cl->qdisc); 1498 1499 if (cl->cl_flags & HFSC_RSC) 1500 rtsc_init(&cl->cl_deadline, &cl->cl_rsc, 0, 0); 1501 if (cl->cl_flags & HFSC_FSC) 1502 rtsc_init(&cl->cl_virtual, &cl->cl_fsc, 0, 0); 1503 if (cl->cl_flags & HFSC_USC) 1504 rtsc_init(&cl->cl_ulimit, &cl->cl_usc, 0, 0); 1505 } 1506 1507 static void 1508 hfsc_reset_qdisc(struct Qdisc *sch) 1509 { 1510 struct hfsc_sched *q = qdisc_priv(sch); 1511 struct hfsc_class *cl; 1512 unsigned int i; 1513 1514 for (i = 0; i < q->clhash.hashsize; i++) { 1515 hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) 1516 hfsc_reset_class(cl); 1517 } 1518 q->eligible = RB_ROOT; 1519 qdisc_watchdog_cancel(&q->watchdog); 1520 sch->qstats.backlog = 0; 1521 sch->q.qlen = 0; 1522 } 1523 1524 static void 1525 hfsc_destroy_qdisc(struct Qdisc *sch) 1526 { 1527 struct hfsc_sched *q = qdisc_priv(sch); 1528 struct hlist_node *next; 1529 struct hfsc_class *cl; 1530 unsigned int i; 1531 1532 for (i = 0; i < q->clhash.hashsize; i++) { 1533 hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) { 1534 tcf_block_put(cl->block); 1535 cl->block = NULL; 1536 } 1537 } 1538 for (i = 0; i < q->clhash.hashsize; i++) { 1539 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], 1540 cl_common.hnode) 1541 hfsc_destroy_class(sch, cl); 1542 } 1543 qdisc_class_hash_destroy(&q->clhash); 1544 qdisc_watchdog_cancel(&q->watchdog); 1545 } 1546 1547 static int 1548 hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) 1549 { 1550 struct hfsc_sched *q = qdisc_priv(sch); 1551 unsigned char *b = skb_tail_pointer(skb); 1552 struct tc_hfsc_qopt qopt; 1553 1554 qopt.defcls = q->defcls; 1555 if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) 1556 goto nla_put_failure; 1557 return skb->len; 1558 1559 nla_put_failure: 1560 nlmsg_trim(skb, b); 1561 return -1; 1562 } 1563 1564 static int 1565 hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) 1566 { 1567 struct hfsc_class *cl; 1568 int uninitialized_var(err); 1569 1570 cl = hfsc_classify(skb, sch, &err); 1571 if (cl == NULL) { 1572 if (err & __NET_XMIT_BYPASS) 1573 qdisc_qstats_drop(sch); 1574 __qdisc_drop(skb, to_free); 1575 return err; 1576 } 1577 1578 err = qdisc_enqueue(skb, cl->qdisc, to_free); 1579 if (unlikely(err != NET_XMIT_SUCCESS)) { 1580 if (net_xmit_drop_count(err)) { 1581 cl->qstats.drops++; 1582 qdisc_qstats_drop(sch); 1583 } 1584 return err; 1585 } 1586 1587 if (cl->qdisc->q.qlen == 1) { 1588 set_active(cl, qdisc_pkt_len(skb)); 1589 /* 1590 * If this is the first packet, isolate the head so an eventual 1591 * head drop before the first dequeue operation has no chance 1592 * to invalidate the deadline. 1593 */ 1594 if (cl->cl_flags & HFSC_RSC) 1595 cl->qdisc->ops->peek(cl->qdisc); 1596 1597 } 1598 1599 qdisc_qstats_backlog_inc(sch, skb); 1600 sch->q.qlen++; 1601 1602 return NET_XMIT_SUCCESS; 1603 } 1604 1605 static struct sk_buff * 1606 hfsc_dequeue(struct Qdisc *sch) 1607 { 1608 struct hfsc_sched *q = qdisc_priv(sch); 1609 struct hfsc_class *cl; 1610 struct sk_buff *skb; 1611 u64 cur_time; 1612 unsigned int next_len; 1613 int realtime = 0; 1614 1615 if (sch->q.qlen == 0) 1616 return NULL; 1617 1618 cur_time = psched_get_time(); 1619 1620 /* 1621 * if there are eligible classes, use real-time criteria. 1622 * find the class with the minimum deadline among 1623 * the eligible classes. 1624 */ 1625 cl = eltree_get_mindl(q, cur_time); 1626 if (cl) { 1627 realtime = 1; 1628 } else { 1629 /* 1630 * use link-sharing criteria 1631 * get the class with the minimum vt in the hierarchy 1632 */ 1633 cl = vttree_get_minvt(&q->root, cur_time); 1634 if (cl == NULL) { 1635 qdisc_qstats_overlimit(sch); 1636 hfsc_schedule_watchdog(sch); 1637 return NULL; 1638 } 1639 } 1640 1641 skb = qdisc_dequeue_peeked(cl->qdisc); 1642 if (skb == NULL) { 1643 qdisc_warn_nonwc("HFSC", cl->qdisc); 1644 return NULL; 1645 } 1646 1647 bstats_update(&cl->bstats, skb); 1648 update_vf(cl, qdisc_pkt_len(skb), cur_time); 1649 if (realtime) 1650 cl->cl_cumul += qdisc_pkt_len(skb); 1651 1652 if (cl->qdisc->q.qlen != 0) { 1653 if (cl->cl_flags & HFSC_RSC) { 1654 /* update ed */ 1655 next_len = qdisc_peek_len(cl->qdisc); 1656 if (realtime) 1657 update_ed(cl, next_len); 1658 else 1659 update_d(cl, next_len); 1660 } 1661 } else { 1662 /* the class becomes passive */ 1663 set_passive(cl); 1664 } 1665 1666 qdisc_bstats_update(sch, skb); 1667 qdisc_qstats_backlog_dec(sch, skb); 1668 sch->q.qlen--; 1669 1670 return skb; 1671 } 1672 1673 static const struct Qdisc_class_ops hfsc_class_ops = { 1674 .change = hfsc_change_class, 1675 .delete = hfsc_delete_class, 1676 .graft = hfsc_graft_class, 1677 .leaf = hfsc_class_leaf, 1678 .qlen_notify = hfsc_qlen_notify, 1679 .get = hfsc_get_class, 1680 .put = hfsc_put_class, 1681 .bind_tcf = hfsc_bind_tcf, 1682 .unbind_tcf = hfsc_unbind_tcf, 1683 .tcf_block = hfsc_tcf_block, 1684 .dump = hfsc_dump_class, 1685 .dump_stats = hfsc_dump_class_stats, 1686 .walk = hfsc_walk 1687 }; 1688 1689 static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = { 1690 .id = "hfsc", 1691 .init = hfsc_init_qdisc, 1692 .change = hfsc_change_qdisc, 1693 .reset = hfsc_reset_qdisc, 1694 .destroy = hfsc_destroy_qdisc, 1695 .dump = hfsc_dump_qdisc, 1696 .enqueue = hfsc_enqueue, 1697 .dequeue = hfsc_dequeue, 1698 .peek = qdisc_peek_dequeued, 1699 .cl_ops = &hfsc_class_ops, 1700 .priv_size = sizeof(struct hfsc_sched), 1701 .owner = THIS_MODULE 1702 }; 1703 1704 static int __init 1705 hfsc_init(void) 1706 { 1707 return register_qdisc(&hfsc_qdisc_ops); 1708 } 1709 1710 static void __exit 1711 hfsc_cleanup(void) 1712 { 1713 unregister_qdisc(&hfsc_qdisc_ops); 1714 } 1715 1716 MODULE_LICENSE("GPL"); 1717 module_init(hfsc_init); 1718 module_exit(hfsc_cleanup); 1719