1 /* 2 * Copyright (c) 2003 Patrick McHardy, <kaber@trash.net> 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 2 7 * of the License, or (at your option) any later version. 8 * 9 * 2003-10-17 - Ported from altq 10 */ 11 /* 12 * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved. 13 * 14 * Permission to use, copy, modify, and distribute this software and 15 * its documentation is hereby granted (including for commercial or 16 * for-profit use), provided that both the copyright notice and this 17 * permission notice appear in all copies of the software, derivative 18 * works, or modified versions, and any portions thereof. 19 * 20 * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF 21 * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS 22 * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED 23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 25 * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 28 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 32 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 33 * DAMAGE. 34 * 35 * Carnegie Mellon encourages (but does not require) users of this 36 * software to return any improvements or extensions that they make, 37 * and to grant Carnegie Mellon the rights to redistribute these 38 * changes without encumbrance. 39 */ 40 /* 41 * H-FSC is described in Proceedings of SIGCOMM'97, 42 * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing, 43 * Real-Time and Priority Service" 44 * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng. 45 * 46 * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing. 47 * when a class has an upperlimit, the fit-time is computed from the 48 * upperlimit service curve. the link-sharing scheduler does not schedule 49 * a class whose fit-time exceeds the current time. 50 */ 51 52 #include <linux/kernel.h> 53 #include <linux/config.h> 54 #include <linux/module.h> 55 #include <linux/types.h> 56 #include <linux/errno.h> 57 #include <linux/jiffies.h> 58 #include <linux/compiler.h> 59 #include <linux/spinlock.h> 60 #include <linux/skbuff.h> 61 #include <linux/string.h> 62 #include <linux/slab.h> 63 #include <linux/timer.h> 64 #include <linux/list.h> 65 #include <linux/rbtree.h> 66 #include <linux/init.h> 67 #include <linux/netdevice.h> 68 #include <linux/rtnetlink.h> 69 #include <linux/pkt_sched.h> 70 #include <net/pkt_sched.h> 71 #include <net/pkt_cls.h> 72 #include <asm/system.h> 73 #include <asm/div64.h> 74 75 #define HFSC_DEBUG 1 76 77 /* 78 * kernel internal service curve representation: 79 * coordinates are given by 64 bit unsigned integers. 80 * x-axis: unit is clock count. 81 * y-axis: unit is byte. 82 * 83 * The service curve parameters are converted to the internal 84 * representation. The slope values are scaled to avoid overflow. 85 * the inverse slope values as well as the y-projection of the 1st 86 * segment are kept in order to to avoid 64-bit divide operations 87 * that are expensive on 32-bit architectures. 88 */ 89 90 struct internal_sc 91 { 92 u64 sm1; /* scaled slope of the 1st segment */ 93 u64 ism1; /* scaled inverse-slope of the 1st segment */ 94 u64 dx; /* the x-projection of the 1st segment */ 95 u64 dy; /* the y-projection of the 1st segment */ 96 u64 sm2; /* scaled slope of the 2nd segment */ 97 u64 ism2; /* scaled inverse-slope of the 2nd segment */ 98 }; 99 100 /* runtime service curve */ 101 struct runtime_sc 102 { 103 u64 x; /* current starting position on x-axis */ 104 u64 y; /* current starting position on y-axis */ 105 u64 sm1; /* scaled slope of the 1st segment */ 106 u64 ism1; /* scaled inverse-slope of the 1st segment */ 107 u64 dx; /* the x-projection of the 1st segment */ 108 u64 dy; /* the y-projection of the 1st segment */ 109 u64 sm2; /* scaled slope of the 2nd segment */ 110 u64 ism2; /* scaled inverse-slope of the 2nd segment */ 111 }; 112 113 enum hfsc_class_flags 114 { 115 HFSC_RSC = 0x1, 116 HFSC_FSC = 0x2, 117 HFSC_USC = 0x4 118 }; 119 120 struct hfsc_class 121 { 122 u32 classid; /* class id */ 123 unsigned int refcnt; /* usage count */ 124 125 struct gnet_stats_basic bstats; 126 struct gnet_stats_queue qstats; 127 struct gnet_stats_rate_est rate_est; 128 spinlock_t *stats_lock; 129 unsigned int level; /* class level in hierarchy */ 130 struct tcf_proto *filter_list; /* filter list */ 131 unsigned int filter_cnt; /* filter count */ 132 133 struct hfsc_sched *sched; /* scheduler data */ 134 struct hfsc_class *cl_parent; /* parent class */ 135 struct list_head siblings; /* sibling classes */ 136 struct list_head children; /* child classes */ 137 struct Qdisc *qdisc; /* leaf qdisc */ 138 139 struct rb_node el_node; /* qdisc's eligible tree member */ 140 struct rb_root vt_tree; /* active children sorted by cl_vt */ 141 struct rb_node vt_node; /* parent's vt_tree member */ 142 struct rb_root cf_tree; /* active children sorted by cl_f */ 143 struct rb_node cf_node; /* parent's cf_heap member */ 144 struct list_head hlist; /* hash list member */ 145 struct list_head dlist; /* drop list member */ 146 147 u64 cl_total; /* total work in bytes */ 148 u64 cl_cumul; /* cumulative work in bytes done by 149 real-time criteria */ 150 151 u64 cl_d; /* deadline*/ 152 u64 cl_e; /* eligible time */ 153 u64 cl_vt; /* virtual time */ 154 u64 cl_f; /* time when this class will fit for 155 link-sharing, max(myf, cfmin) */ 156 u64 cl_myf; /* my fit-time (calculated from this 157 class's own upperlimit curve) */ 158 u64 cl_myfadj; /* my fit-time adjustment (to cancel 159 history dependence) */ 160 u64 cl_cfmin; /* earliest children's fit-time (used 161 with cl_myf to obtain cl_f) */ 162 u64 cl_cvtmin; /* minimal virtual time among the 163 children fit for link-sharing 164 (monotonic within a period) */ 165 u64 cl_vtadj; /* intra-period cumulative vt 166 adjustment */ 167 u64 cl_vtoff; /* inter-period cumulative vt offset */ 168 u64 cl_cvtmax; /* max child's vt in the last period */ 169 u64 cl_cvtoff; /* cumulative cvtmax of all periods */ 170 u64 cl_pcvtoff; /* parent's cvtoff at initalization 171 time */ 172 173 struct internal_sc cl_rsc; /* internal real-time service curve */ 174 struct internal_sc cl_fsc; /* internal fair service curve */ 175 struct internal_sc cl_usc; /* internal upperlimit service curve */ 176 struct runtime_sc cl_deadline; /* deadline curve */ 177 struct runtime_sc cl_eligible; /* eligible curve */ 178 struct runtime_sc cl_virtual; /* virtual curve */ 179 struct runtime_sc cl_ulimit; /* upperlimit curve */ 180 181 unsigned long cl_flags; /* which curves are valid */ 182 unsigned long cl_vtperiod; /* vt period sequence number */ 183 unsigned long cl_parentperiod;/* parent's vt period sequence number*/ 184 unsigned long cl_nactive; /* number of active children */ 185 }; 186 187 #define HFSC_HSIZE 16 188 189 struct hfsc_sched 190 { 191 u16 defcls; /* default class id */ 192 struct hfsc_class root; /* root class */ 193 struct list_head clhash[HFSC_HSIZE]; /* class hash */ 194 struct rb_root eligible; /* eligible tree */ 195 struct list_head droplist; /* active leaf class list (for 196 dropping) */ 197 struct sk_buff_head requeue; /* requeued packet */ 198 struct timer_list wd_timer; /* watchdog timer */ 199 }; 200 201 /* 202 * macros 203 */ 204 #ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY 205 #include <linux/time.h> 206 #undef PSCHED_GET_TIME 207 #define PSCHED_GET_TIME(stamp) \ 208 do { \ 209 struct timeval tv; \ 210 do_gettimeofday(&tv); \ 211 (stamp) = 1ULL * USEC_PER_SEC * tv.tv_sec + tv.tv_usec; \ 212 } while (0) 213 #endif 214 215 #if HFSC_DEBUG 216 #define ASSERT(cond) \ 217 do { \ 218 if (unlikely(!(cond))) \ 219 printk("assertion %s failed at %s:%i (%s)\n", \ 220 #cond, __FILE__, __LINE__, __FUNCTION__); \ 221 } while (0) 222 #else 223 #define ASSERT(cond) 224 #endif /* HFSC_DEBUG */ 225 226 #define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */ 227 228 229 /* 230 * eligible tree holds backlogged classes being sorted by their eligible times. 231 * there is one eligible tree per hfsc instance. 232 */ 233 234 static void 235 eltree_insert(struct hfsc_class *cl) 236 { 237 struct rb_node **p = &cl->sched->eligible.rb_node; 238 struct rb_node *parent = NULL; 239 struct hfsc_class *cl1; 240 241 while (*p != NULL) { 242 parent = *p; 243 cl1 = rb_entry(parent, struct hfsc_class, el_node); 244 if (cl->cl_e >= cl1->cl_e) 245 p = &parent->rb_right; 246 else 247 p = &parent->rb_left; 248 } 249 rb_link_node(&cl->el_node, parent, p); 250 rb_insert_color(&cl->el_node, &cl->sched->eligible); 251 } 252 253 static inline void 254 eltree_remove(struct hfsc_class *cl) 255 { 256 rb_erase(&cl->el_node, &cl->sched->eligible); 257 } 258 259 static inline void 260 eltree_update(struct hfsc_class *cl) 261 { 262 eltree_remove(cl); 263 eltree_insert(cl); 264 } 265 266 /* find the class with the minimum deadline among the eligible classes */ 267 static inline struct hfsc_class * 268 eltree_get_mindl(struct hfsc_sched *q, u64 cur_time) 269 { 270 struct hfsc_class *p, *cl = NULL; 271 struct rb_node *n; 272 273 for (n = rb_first(&q->eligible); n != NULL; n = rb_next(n)) { 274 p = rb_entry(n, struct hfsc_class, el_node); 275 if (p->cl_e > cur_time) 276 break; 277 if (cl == NULL || p->cl_d < cl->cl_d) 278 cl = p; 279 } 280 return cl; 281 } 282 283 /* find the class with minimum eligible time among the eligible classes */ 284 static inline struct hfsc_class * 285 eltree_get_minel(struct hfsc_sched *q) 286 { 287 struct rb_node *n; 288 289 n = rb_first(&q->eligible); 290 if (n == NULL) 291 return NULL; 292 return rb_entry(n, struct hfsc_class, el_node); 293 } 294 295 /* 296 * vttree holds holds backlogged child classes being sorted by their virtual 297 * time. each intermediate class has one vttree. 298 */ 299 static void 300 vttree_insert(struct hfsc_class *cl) 301 { 302 struct rb_node **p = &cl->cl_parent->vt_tree.rb_node; 303 struct rb_node *parent = NULL; 304 struct hfsc_class *cl1; 305 306 while (*p != NULL) { 307 parent = *p; 308 cl1 = rb_entry(parent, struct hfsc_class, vt_node); 309 if (cl->cl_vt >= cl1->cl_vt) 310 p = &parent->rb_right; 311 else 312 p = &parent->rb_left; 313 } 314 rb_link_node(&cl->vt_node, parent, p); 315 rb_insert_color(&cl->vt_node, &cl->cl_parent->vt_tree); 316 } 317 318 static inline void 319 vttree_remove(struct hfsc_class *cl) 320 { 321 rb_erase(&cl->vt_node, &cl->cl_parent->vt_tree); 322 } 323 324 static inline void 325 vttree_update(struct hfsc_class *cl) 326 { 327 vttree_remove(cl); 328 vttree_insert(cl); 329 } 330 331 static inline struct hfsc_class * 332 vttree_firstfit(struct hfsc_class *cl, u64 cur_time) 333 { 334 struct hfsc_class *p; 335 struct rb_node *n; 336 337 for (n = rb_first(&cl->vt_tree); n != NULL; n = rb_next(n)) { 338 p = rb_entry(n, struct hfsc_class, vt_node); 339 if (p->cl_f <= cur_time) 340 return p; 341 } 342 return NULL; 343 } 344 345 /* 346 * get the leaf class with the minimum vt in the hierarchy 347 */ 348 static struct hfsc_class * 349 vttree_get_minvt(struct hfsc_class *cl, u64 cur_time) 350 { 351 /* if root-class's cfmin is bigger than cur_time nothing to do */ 352 if (cl->cl_cfmin > cur_time) 353 return NULL; 354 355 while (cl->level > 0) { 356 cl = vttree_firstfit(cl, cur_time); 357 if (cl == NULL) 358 return NULL; 359 /* 360 * update parent's cl_cvtmin. 361 */ 362 if (cl->cl_parent->cl_cvtmin < cl->cl_vt) 363 cl->cl_parent->cl_cvtmin = cl->cl_vt; 364 } 365 return cl; 366 } 367 368 static void 369 cftree_insert(struct hfsc_class *cl) 370 { 371 struct rb_node **p = &cl->cl_parent->cf_tree.rb_node; 372 struct rb_node *parent = NULL; 373 struct hfsc_class *cl1; 374 375 while (*p != NULL) { 376 parent = *p; 377 cl1 = rb_entry(parent, struct hfsc_class, cf_node); 378 if (cl->cl_f >= cl1->cl_f) 379 p = &parent->rb_right; 380 else 381 p = &parent->rb_left; 382 } 383 rb_link_node(&cl->cf_node, parent, p); 384 rb_insert_color(&cl->cf_node, &cl->cl_parent->cf_tree); 385 } 386 387 static inline void 388 cftree_remove(struct hfsc_class *cl) 389 { 390 rb_erase(&cl->cf_node, &cl->cl_parent->cf_tree); 391 } 392 393 static inline void 394 cftree_update(struct hfsc_class *cl) 395 { 396 cftree_remove(cl); 397 cftree_insert(cl); 398 } 399 400 /* 401 * service curve support functions 402 * 403 * external service curve parameters 404 * m: bps 405 * d: us 406 * internal service curve parameters 407 * sm: (bytes/psched_us) << SM_SHIFT 408 * ism: (psched_us/byte) << ISM_SHIFT 409 * dx: psched_us 410 * 411 * Clock source resolution (CONFIG_NET_SCH_CLK_*) 412 * JIFFIES: for 48<=HZ<=1534 resolution is between 0.63us and 1.27us. 413 * CPU: resolution is between 0.5us and 1us. 414 * GETTIMEOFDAY: resolution is exactly 1us. 415 * 416 * sm and ism are scaled in order to keep effective digits. 417 * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective 418 * digits in decimal using the following table. 419 * 420 * Note: We can afford the additional accuracy (altq hfsc keeps at most 421 * 3 effective digits) thanks to the fact that linux clock is bounded 422 * much more tightly. 423 * 424 * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps 425 * ------------+------------------------------------------------------- 426 * bytes/0.5us 6.25e-3 62.5e-3 625e-3 6250e-e 62500e-3 427 * bytes/us 12.5e-3 125e-3 1250e-3 12500e-3 125000e-3 428 * bytes/1.27us 15.875e-3 158.75e-3 1587.5e-3 15875e-3 158750e-3 429 * 430 * 0.5us/byte 160 16 1.6 0.16 0.016 431 * us/byte 80 8 0.8 0.08 0.008 432 * 1.27us/byte 63 6.3 0.63 0.063 0.0063 433 */ 434 #define SM_SHIFT 20 435 #define ISM_SHIFT 18 436 437 #define SM_MASK ((1ULL << SM_SHIFT) - 1) 438 #define ISM_MASK ((1ULL << ISM_SHIFT) - 1) 439 440 static inline u64 441 seg_x2y(u64 x, u64 sm) 442 { 443 u64 y; 444 445 /* 446 * compute 447 * y = x * sm >> SM_SHIFT 448 * but divide it for the upper and lower bits to avoid overflow 449 */ 450 y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT); 451 return y; 452 } 453 454 static inline u64 455 seg_y2x(u64 y, u64 ism) 456 { 457 u64 x; 458 459 if (y == 0) 460 x = 0; 461 else if (ism == HT_INFINITY) 462 x = HT_INFINITY; 463 else { 464 x = (y >> ISM_SHIFT) * ism 465 + (((y & ISM_MASK) * ism) >> ISM_SHIFT); 466 } 467 return x; 468 } 469 470 /* Convert m (bps) into sm (bytes/psched us) */ 471 static u64 472 m2sm(u32 m) 473 { 474 u64 sm; 475 476 sm = ((u64)m << SM_SHIFT); 477 sm += PSCHED_JIFFIE2US(HZ) - 1; 478 do_div(sm, PSCHED_JIFFIE2US(HZ)); 479 return sm; 480 } 481 482 /* convert m (bps) into ism (psched us/byte) */ 483 static u64 484 m2ism(u32 m) 485 { 486 u64 ism; 487 488 if (m == 0) 489 ism = HT_INFINITY; 490 else { 491 ism = ((u64)PSCHED_JIFFIE2US(HZ) << ISM_SHIFT); 492 ism += m - 1; 493 do_div(ism, m); 494 } 495 return ism; 496 } 497 498 /* convert d (us) into dx (psched us) */ 499 static u64 500 d2dx(u32 d) 501 { 502 u64 dx; 503 504 dx = ((u64)d * PSCHED_JIFFIE2US(HZ)); 505 dx += USEC_PER_SEC - 1; 506 do_div(dx, USEC_PER_SEC); 507 return dx; 508 } 509 510 /* convert sm (bytes/psched us) into m (bps) */ 511 static u32 512 sm2m(u64 sm) 513 { 514 u64 m; 515 516 m = (sm * PSCHED_JIFFIE2US(HZ)) >> SM_SHIFT; 517 return (u32)m; 518 } 519 520 /* convert dx (psched us) into d (us) */ 521 static u32 522 dx2d(u64 dx) 523 { 524 u64 d; 525 526 d = dx * USEC_PER_SEC; 527 do_div(d, PSCHED_JIFFIE2US(HZ)); 528 return (u32)d; 529 } 530 531 static void 532 sc2isc(struct tc_service_curve *sc, struct internal_sc *isc) 533 { 534 isc->sm1 = m2sm(sc->m1); 535 isc->ism1 = m2ism(sc->m1); 536 isc->dx = d2dx(sc->d); 537 isc->dy = seg_x2y(isc->dx, isc->sm1); 538 isc->sm2 = m2sm(sc->m2); 539 isc->ism2 = m2ism(sc->m2); 540 } 541 542 /* 543 * initialize the runtime service curve with the given internal 544 * service curve starting at (x, y). 545 */ 546 static void 547 rtsc_init(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y) 548 { 549 rtsc->x = x; 550 rtsc->y = y; 551 rtsc->sm1 = isc->sm1; 552 rtsc->ism1 = isc->ism1; 553 rtsc->dx = isc->dx; 554 rtsc->dy = isc->dy; 555 rtsc->sm2 = isc->sm2; 556 rtsc->ism2 = isc->ism2; 557 } 558 559 /* 560 * calculate the y-projection of the runtime service curve by the 561 * given x-projection value 562 */ 563 static u64 564 rtsc_y2x(struct runtime_sc *rtsc, u64 y) 565 { 566 u64 x; 567 568 if (y < rtsc->y) 569 x = rtsc->x; 570 else if (y <= rtsc->y + rtsc->dy) { 571 /* x belongs to the 1st segment */ 572 if (rtsc->dy == 0) 573 x = rtsc->x + rtsc->dx; 574 else 575 x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1); 576 } else { 577 /* x belongs to the 2nd segment */ 578 x = rtsc->x + rtsc->dx 579 + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2); 580 } 581 return x; 582 } 583 584 static u64 585 rtsc_x2y(struct runtime_sc *rtsc, u64 x) 586 { 587 u64 y; 588 589 if (x <= rtsc->x) 590 y = rtsc->y; 591 else if (x <= rtsc->x + rtsc->dx) 592 /* y belongs to the 1st segment */ 593 y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1); 594 else 595 /* y belongs to the 2nd segment */ 596 y = rtsc->y + rtsc->dy 597 + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2); 598 return y; 599 } 600 601 /* 602 * update the runtime service curve by taking the minimum of the current 603 * runtime service curve and the service curve starting at (x, y). 604 */ 605 static void 606 rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y) 607 { 608 u64 y1, y2, dx, dy; 609 u32 dsm; 610 611 if (isc->sm1 <= isc->sm2) { 612 /* service curve is convex */ 613 y1 = rtsc_x2y(rtsc, x); 614 if (y1 < y) 615 /* the current rtsc is smaller */ 616 return; 617 rtsc->x = x; 618 rtsc->y = y; 619 return; 620 } 621 622 /* 623 * service curve is concave 624 * compute the two y values of the current rtsc 625 * y1: at x 626 * y2: at (x + dx) 627 */ 628 y1 = rtsc_x2y(rtsc, x); 629 if (y1 <= y) { 630 /* rtsc is below isc, no change to rtsc */ 631 return; 632 } 633 634 y2 = rtsc_x2y(rtsc, x + isc->dx); 635 if (y2 >= y + isc->dy) { 636 /* rtsc is above isc, replace rtsc by isc */ 637 rtsc->x = x; 638 rtsc->y = y; 639 rtsc->dx = isc->dx; 640 rtsc->dy = isc->dy; 641 return; 642 } 643 644 /* 645 * the two curves intersect 646 * compute the offsets (dx, dy) using the reverse 647 * function of seg_x2y() 648 * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y) 649 */ 650 dx = (y1 - y) << SM_SHIFT; 651 dsm = isc->sm1 - isc->sm2; 652 do_div(dx, dsm); 653 /* 654 * check if (x, y1) belongs to the 1st segment of rtsc. 655 * if so, add the offset. 656 */ 657 if (rtsc->x + rtsc->dx > x) 658 dx += rtsc->x + rtsc->dx - x; 659 dy = seg_x2y(dx, isc->sm1); 660 661 rtsc->x = x; 662 rtsc->y = y; 663 rtsc->dx = dx; 664 rtsc->dy = dy; 665 return; 666 } 667 668 static void 669 init_ed(struct hfsc_class *cl, unsigned int next_len) 670 { 671 u64 cur_time; 672 673 PSCHED_GET_TIME(cur_time); 674 675 /* update the deadline curve */ 676 rtsc_min(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul); 677 678 /* 679 * update the eligible curve. 680 * for concave, it is equal to the deadline curve. 681 * for convex, it is a linear curve with slope m2. 682 */ 683 cl->cl_eligible = cl->cl_deadline; 684 if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) { 685 cl->cl_eligible.dx = 0; 686 cl->cl_eligible.dy = 0; 687 } 688 689 /* compute e and d */ 690 cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); 691 cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); 692 693 eltree_insert(cl); 694 } 695 696 static void 697 update_ed(struct hfsc_class *cl, unsigned int next_len) 698 { 699 cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); 700 cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); 701 702 eltree_update(cl); 703 } 704 705 static inline void 706 update_d(struct hfsc_class *cl, unsigned int next_len) 707 { 708 cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); 709 } 710 711 static inline void 712 update_cfmin(struct hfsc_class *cl) 713 { 714 struct rb_node *n = rb_first(&cl->cf_tree); 715 struct hfsc_class *p; 716 717 if (n == NULL) { 718 cl->cl_cfmin = 0; 719 return; 720 } 721 p = rb_entry(n, struct hfsc_class, cf_node); 722 cl->cl_cfmin = p->cl_f; 723 } 724 725 static void 726 init_vf(struct hfsc_class *cl, unsigned int len) 727 { 728 struct hfsc_class *max_cl; 729 struct rb_node *n; 730 u64 vt, f, cur_time; 731 int go_active; 732 733 cur_time = 0; 734 go_active = 1; 735 for (; cl->cl_parent != NULL; cl = cl->cl_parent) { 736 if (go_active && cl->cl_nactive++ == 0) 737 go_active = 1; 738 else 739 go_active = 0; 740 741 if (go_active) { 742 n = rb_last(&cl->cl_parent->vt_tree); 743 if (n != NULL) { 744 max_cl = rb_entry(n, struct hfsc_class,vt_node); 745 /* 746 * set vt to the average of the min and max 747 * classes. if the parent's period didn't 748 * change, don't decrease vt of the class. 749 */ 750 vt = max_cl->cl_vt; 751 if (cl->cl_parent->cl_cvtmin != 0) 752 vt = (cl->cl_parent->cl_cvtmin + vt)/2; 753 754 if (cl->cl_parent->cl_vtperiod != 755 cl->cl_parentperiod || vt > cl->cl_vt) 756 cl->cl_vt = vt; 757 } else { 758 /* 759 * first child for a new parent backlog period. 760 * add parent's cvtmax to cvtoff to make a new 761 * vt (vtoff + vt) larger than the vt in the 762 * last period for all children. 763 */ 764 vt = cl->cl_parent->cl_cvtmax; 765 cl->cl_parent->cl_cvtoff += vt; 766 cl->cl_parent->cl_cvtmax = 0; 767 cl->cl_parent->cl_cvtmin = 0; 768 cl->cl_vt = 0; 769 } 770 771 cl->cl_vtoff = cl->cl_parent->cl_cvtoff - 772 cl->cl_pcvtoff; 773 774 /* update the virtual curve */ 775 vt = cl->cl_vt + cl->cl_vtoff; 776 rtsc_min(&cl->cl_virtual, &cl->cl_fsc, vt, 777 cl->cl_total); 778 if (cl->cl_virtual.x == vt) { 779 cl->cl_virtual.x -= cl->cl_vtoff; 780 cl->cl_vtoff = 0; 781 } 782 cl->cl_vtadj = 0; 783 784 cl->cl_vtperiod++; /* increment vt period */ 785 cl->cl_parentperiod = cl->cl_parent->cl_vtperiod; 786 if (cl->cl_parent->cl_nactive == 0) 787 cl->cl_parentperiod++; 788 cl->cl_f = 0; 789 790 vttree_insert(cl); 791 cftree_insert(cl); 792 793 if (cl->cl_flags & HFSC_USC) { 794 /* class has upper limit curve */ 795 if (cur_time == 0) 796 PSCHED_GET_TIME(cur_time); 797 798 /* update the ulimit curve */ 799 rtsc_min(&cl->cl_ulimit, &cl->cl_usc, cur_time, 800 cl->cl_total); 801 /* compute myf */ 802 cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, 803 cl->cl_total); 804 cl->cl_myfadj = 0; 805 } 806 } 807 808 f = max(cl->cl_myf, cl->cl_cfmin); 809 if (f != cl->cl_f) { 810 cl->cl_f = f; 811 cftree_update(cl); 812 update_cfmin(cl->cl_parent); 813 } 814 } 815 } 816 817 static void 818 update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) 819 { 820 u64 f; /* , myf_bound, delta; */ 821 int go_passive = 0; 822 823 if (cl->qdisc->q.qlen == 0 && cl->cl_flags & HFSC_FSC) 824 go_passive = 1; 825 826 for (; cl->cl_parent != NULL; cl = cl->cl_parent) { 827 cl->cl_total += len; 828 829 if (!(cl->cl_flags & HFSC_FSC) || cl->cl_nactive == 0) 830 continue; 831 832 if (go_passive && --cl->cl_nactive == 0) 833 go_passive = 1; 834 else 835 go_passive = 0; 836 837 if (go_passive) { 838 /* no more active child, going passive */ 839 840 /* update cvtmax of the parent class */ 841 if (cl->cl_vt > cl->cl_parent->cl_cvtmax) 842 cl->cl_parent->cl_cvtmax = cl->cl_vt; 843 844 /* remove this class from the vt tree */ 845 vttree_remove(cl); 846 847 cftree_remove(cl); 848 update_cfmin(cl->cl_parent); 849 850 continue; 851 } 852 853 /* 854 * update vt and f 855 */ 856 cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) 857 - cl->cl_vtoff + cl->cl_vtadj; 858 859 /* 860 * if vt of the class is smaller than cvtmin, 861 * the class was skipped in the past due to non-fit. 862 * if so, we need to adjust vtadj. 863 */ 864 if (cl->cl_vt < cl->cl_parent->cl_cvtmin) { 865 cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt; 866 cl->cl_vt = cl->cl_parent->cl_cvtmin; 867 } 868 869 /* update the vt tree */ 870 vttree_update(cl); 871 872 if (cl->cl_flags & HFSC_USC) { 873 cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit, 874 cl->cl_total); 875 #if 0 876 /* 877 * This code causes classes to stay way under their 878 * limit when multiple classes are used at gigabit 879 * speed. needs investigation. -kaber 880 */ 881 /* 882 * if myf lags behind by more than one clock tick 883 * from the current time, adjust myfadj to prevent 884 * a rate-limited class from going greedy. 885 * in a steady state under rate-limiting, myf 886 * fluctuates within one clock tick. 887 */ 888 myf_bound = cur_time - PSCHED_JIFFIE2US(1); 889 if (cl->cl_myf < myf_bound) { 890 delta = cur_time - cl->cl_myf; 891 cl->cl_myfadj += delta; 892 cl->cl_myf += delta; 893 } 894 #endif 895 } 896 897 f = max(cl->cl_myf, cl->cl_cfmin); 898 if (f != cl->cl_f) { 899 cl->cl_f = f; 900 cftree_update(cl); 901 update_cfmin(cl->cl_parent); 902 } 903 } 904 } 905 906 static void 907 set_active(struct hfsc_class *cl, unsigned int len) 908 { 909 if (cl->cl_flags & HFSC_RSC) 910 init_ed(cl, len); 911 if (cl->cl_flags & HFSC_FSC) 912 init_vf(cl, len); 913 914 list_add_tail(&cl->dlist, &cl->sched->droplist); 915 } 916 917 static void 918 set_passive(struct hfsc_class *cl) 919 { 920 if (cl->cl_flags & HFSC_RSC) 921 eltree_remove(cl); 922 923 list_del(&cl->dlist); 924 925 /* 926 * vttree is now handled in update_vf() so that update_vf(cl, 0, 0) 927 * needs to be called explicitly to remove a class from vttree. 928 */ 929 } 930 931 /* 932 * hack to get length of first packet in queue. 933 */ 934 static unsigned int 935 qdisc_peek_len(struct Qdisc *sch) 936 { 937 struct sk_buff *skb; 938 unsigned int len; 939 940 skb = sch->dequeue(sch); 941 if (skb == NULL) { 942 if (net_ratelimit()) 943 printk("qdisc_peek_len: non work-conserving qdisc ?\n"); 944 return 0; 945 } 946 len = skb->len; 947 if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) { 948 if (net_ratelimit()) 949 printk("qdisc_peek_len: failed to requeue\n"); 950 return 0; 951 } 952 return len; 953 } 954 955 static void 956 hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl) 957 { 958 unsigned int len = cl->qdisc->q.qlen; 959 960 qdisc_reset(cl->qdisc); 961 if (len > 0) { 962 update_vf(cl, 0, 0); 963 set_passive(cl); 964 sch->q.qlen -= len; 965 } 966 } 967 968 static void 969 hfsc_adjust_levels(struct hfsc_class *cl) 970 { 971 struct hfsc_class *p; 972 unsigned int level; 973 974 do { 975 level = 0; 976 list_for_each_entry(p, &cl->children, siblings) { 977 if (p->level > level) 978 level = p->level; 979 } 980 cl->level = level + 1; 981 } while ((cl = cl->cl_parent) != NULL); 982 } 983 984 static inline unsigned int 985 hfsc_hash(u32 h) 986 { 987 h ^= h >> 8; 988 h ^= h >> 4; 989 990 return h & (HFSC_HSIZE - 1); 991 } 992 993 static inline struct hfsc_class * 994 hfsc_find_class(u32 classid, struct Qdisc *sch) 995 { 996 struct hfsc_sched *q = qdisc_priv(sch); 997 struct hfsc_class *cl; 998 999 list_for_each_entry(cl, &q->clhash[hfsc_hash(classid)], hlist) { 1000 if (cl->classid == classid) 1001 return cl; 1002 } 1003 return NULL; 1004 } 1005 1006 static void 1007 hfsc_change_rsc(struct hfsc_class *cl, struct tc_service_curve *rsc, 1008 u64 cur_time) 1009 { 1010 sc2isc(rsc, &cl->cl_rsc); 1011 rtsc_init(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul); 1012 cl->cl_eligible = cl->cl_deadline; 1013 if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) { 1014 cl->cl_eligible.dx = 0; 1015 cl->cl_eligible.dy = 0; 1016 } 1017 cl->cl_flags |= HFSC_RSC; 1018 } 1019 1020 static void 1021 hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc) 1022 { 1023 sc2isc(fsc, &cl->cl_fsc); 1024 rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); 1025 cl->cl_flags |= HFSC_FSC; 1026 } 1027 1028 static void 1029 hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc, 1030 u64 cur_time) 1031 { 1032 sc2isc(usc, &cl->cl_usc); 1033 rtsc_init(&cl->cl_ulimit, &cl->cl_usc, cur_time, cl->cl_total); 1034 cl->cl_flags |= HFSC_USC; 1035 } 1036 1037 static int 1038 hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 1039 struct rtattr **tca, unsigned long *arg) 1040 { 1041 struct hfsc_sched *q = qdisc_priv(sch); 1042 struct hfsc_class *cl = (struct hfsc_class *)*arg; 1043 struct hfsc_class *parent = NULL; 1044 struct rtattr *opt = tca[TCA_OPTIONS-1]; 1045 struct rtattr *tb[TCA_HFSC_MAX]; 1046 struct tc_service_curve *rsc = NULL, *fsc = NULL, *usc = NULL; 1047 u64 cur_time; 1048 1049 if (opt == NULL || rtattr_parse_nested(tb, TCA_HFSC_MAX, opt)) 1050 return -EINVAL; 1051 1052 if (tb[TCA_HFSC_RSC-1]) { 1053 if (RTA_PAYLOAD(tb[TCA_HFSC_RSC-1]) < sizeof(*rsc)) 1054 return -EINVAL; 1055 rsc = RTA_DATA(tb[TCA_HFSC_RSC-1]); 1056 if (rsc->m1 == 0 && rsc->m2 == 0) 1057 rsc = NULL; 1058 } 1059 1060 if (tb[TCA_HFSC_FSC-1]) { 1061 if (RTA_PAYLOAD(tb[TCA_HFSC_FSC-1]) < sizeof(*fsc)) 1062 return -EINVAL; 1063 fsc = RTA_DATA(tb[TCA_HFSC_FSC-1]); 1064 if (fsc->m1 == 0 && fsc->m2 == 0) 1065 fsc = NULL; 1066 } 1067 1068 if (tb[TCA_HFSC_USC-1]) { 1069 if (RTA_PAYLOAD(tb[TCA_HFSC_USC-1]) < sizeof(*usc)) 1070 return -EINVAL; 1071 usc = RTA_DATA(tb[TCA_HFSC_USC-1]); 1072 if (usc->m1 == 0 && usc->m2 == 0) 1073 usc = NULL; 1074 } 1075 1076 if (cl != NULL) { 1077 if (parentid) { 1078 if (cl->cl_parent && cl->cl_parent->classid != parentid) 1079 return -EINVAL; 1080 if (cl->cl_parent == NULL && parentid != TC_H_ROOT) 1081 return -EINVAL; 1082 } 1083 PSCHED_GET_TIME(cur_time); 1084 1085 sch_tree_lock(sch); 1086 if (rsc != NULL) 1087 hfsc_change_rsc(cl, rsc, cur_time); 1088 if (fsc != NULL) 1089 hfsc_change_fsc(cl, fsc); 1090 if (usc != NULL) 1091 hfsc_change_usc(cl, usc, cur_time); 1092 1093 if (cl->qdisc->q.qlen != 0) { 1094 if (cl->cl_flags & HFSC_RSC) 1095 update_ed(cl, qdisc_peek_len(cl->qdisc)); 1096 if (cl->cl_flags & HFSC_FSC) 1097 update_vf(cl, 0, cur_time); 1098 } 1099 sch_tree_unlock(sch); 1100 1101 #ifdef CONFIG_NET_ESTIMATOR 1102 if (tca[TCA_RATE-1]) 1103 gen_replace_estimator(&cl->bstats, &cl->rate_est, 1104 cl->stats_lock, tca[TCA_RATE-1]); 1105 #endif 1106 return 0; 1107 } 1108 1109 if (parentid == TC_H_ROOT) 1110 return -EEXIST; 1111 1112 parent = &q->root; 1113 if (parentid) { 1114 parent = hfsc_find_class(parentid, sch); 1115 if (parent == NULL) 1116 return -ENOENT; 1117 } 1118 1119 if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0) 1120 return -EINVAL; 1121 if (hfsc_find_class(classid, sch)) 1122 return -EEXIST; 1123 1124 if (rsc == NULL && fsc == NULL) 1125 return -EINVAL; 1126 1127 cl = kmalloc(sizeof(struct hfsc_class), GFP_KERNEL); 1128 if (cl == NULL) 1129 return -ENOBUFS; 1130 memset(cl, 0, sizeof(struct hfsc_class)); 1131 1132 if (rsc != NULL) 1133 hfsc_change_rsc(cl, rsc, 0); 1134 if (fsc != NULL) 1135 hfsc_change_fsc(cl, fsc); 1136 if (usc != NULL) 1137 hfsc_change_usc(cl, usc, 0); 1138 1139 cl->refcnt = 1; 1140 cl->classid = classid; 1141 cl->sched = q; 1142 cl->cl_parent = parent; 1143 cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); 1144 if (cl->qdisc == NULL) 1145 cl->qdisc = &noop_qdisc; 1146 cl->stats_lock = &sch->dev->queue_lock; 1147 INIT_LIST_HEAD(&cl->children); 1148 cl->vt_tree = RB_ROOT; 1149 cl->cf_tree = RB_ROOT; 1150 1151 sch_tree_lock(sch); 1152 list_add_tail(&cl->hlist, &q->clhash[hfsc_hash(classid)]); 1153 list_add_tail(&cl->siblings, &parent->children); 1154 if (parent->level == 0) 1155 hfsc_purge_queue(sch, parent); 1156 hfsc_adjust_levels(parent); 1157 cl->cl_pcvtoff = parent->cl_cvtoff; 1158 sch_tree_unlock(sch); 1159 1160 #ifdef CONFIG_NET_ESTIMATOR 1161 if (tca[TCA_RATE-1]) 1162 gen_new_estimator(&cl->bstats, &cl->rate_est, 1163 cl->stats_lock, tca[TCA_RATE-1]); 1164 #endif 1165 *arg = (unsigned long)cl; 1166 return 0; 1167 } 1168 1169 static void 1170 hfsc_destroy_filters(struct tcf_proto **fl) 1171 { 1172 struct tcf_proto *tp; 1173 1174 while ((tp = *fl) != NULL) { 1175 *fl = tp->next; 1176 tcf_destroy(tp); 1177 } 1178 } 1179 1180 static void 1181 hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) 1182 { 1183 struct hfsc_sched *q = qdisc_priv(sch); 1184 1185 hfsc_destroy_filters(&cl->filter_list); 1186 qdisc_destroy(cl->qdisc); 1187 #ifdef CONFIG_NET_ESTIMATOR 1188 gen_kill_estimator(&cl->bstats, &cl->rate_est); 1189 #endif 1190 if (cl != &q->root) 1191 kfree(cl); 1192 } 1193 1194 static int 1195 hfsc_delete_class(struct Qdisc *sch, unsigned long arg) 1196 { 1197 struct hfsc_sched *q = qdisc_priv(sch); 1198 struct hfsc_class *cl = (struct hfsc_class *)arg; 1199 1200 if (cl->level > 0 || cl->filter_cnt > 0 || cl == &q->root) 1201 return -EBUSY; 1202 1203 sch_tree_lock(sch); 1204 1205 list_del(&cl->hlist); 1206 list_del(&cl->siblings); 1207 hfsc_adjust_levels(cl->cl_parent); 1208 hfsc_purge_queue(sch, cl); 1209 if (--cl->refcnt == 0) 1210 hfsc_destroy_class(sch, cl); 1211 1212 sch_tree_unlock(sch); 1213 return 0; 1214 } 1215 1216 static struct hfsc_class * 1217 hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) 1218 { 1219 struct hfsc_sched *q = qdisc_priv(sch); 1220 struct hfsc_class *cl; 1221 struct tcf_result res; 1222 struct tcf_proto *tcf; 1223 int result; 1224 1225 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0 && 1226 (cl = hfsc_find_class(skb->priority, sch)) != NULL) 1227 if (cl->level == 0) 1228 return cl; 1229 1230 *qerr = NET_XMIT_BYPASS; 1231 tcf = q->root.filter_list; 1232 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { 1233 #ifdef CONFIG_NET_CLS_ACT 1234 switch (result) { 1235 case TC_ACT_QUEUED: 1236 case TC_ACT_STOLEN: 1237 *qerr = NET_XMIT_SUCCESS; 1238 case TC_ACT_SHOT: 1239 return NULL; 1240 } 1241 #elif defined(CONFIG_NET_CLS_POLICE) 1242 if (result == TC_POLICE_SHOT) 1243 return NULL; 1244 #endif 1245 if ((cl = (struct hfsc_class *)res.class) == NULL) { 1246 if ((cl = hfsc_find_class(res.classid, sch)) == NULL) 1247 break; /* filter selected invalid classid */ 1248 } 1249 1250 if (cl->level == 0) 1251 return cl; /* hit leaf class */ 1252 1253 /* apply inner filter chain */ 1254 tcf = cl->filter_list; 1255 } 1256 1257 /* classification failed, try default class */ 1258 cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch); 1259 if (cl == NULL || cl->level > 0) 1260 return NULL; 1261 1262 return cl; 1263 } 1264 1265 static int 1266 hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 1267 struct Qdisc **old) 1268 { 1269 struct hfsc_class *cl = (struct hfsc_class *)arg; 1270 1271 if (cl == NULL) 1272 return -ENOENT; 1273 if (cl->level > 0) 1274 return -EINVAL; 1275 if (new == NULL) { 1276 new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); 1277 if (new == NULL) 1278 new = &noop_qdisc; 1279 } 1280 1281 sch_tree_lock(sch); 1282 hfsc_purge_queue(sch, cl); 1283 *old = xchg(&cl->qdisc, new); 1284 sch_tree_unlock(sch); 1285 return 0; 1286 } 1287 1288 static struct Qdisc * 1289 hfsc_class_leaf(struct Qdisc *sch, unsigned long arg) 1290 { 1291 struct hfsc_class *cl = (struct hfsc_class *)arg; 1292 1293 if (cl != NULL && cl->level == 0) 1294 return cl->qdisc; 1295 1296 return NULL; 1297 } 1298 1299 static unsigned long 1300 hfsc_get_class(struct Qdisc *sch, u32 classid) 1301 { 1302 struct hfsc_class *cl = hfsc_find_class(classid, sch); 1303 1304 if (cl != NULL) 1305 cl->refcnt++; 1306 1307 return (unsigned long)cl; 1308 } 1309 1310 static void 1311 hfsc_put_class(struct Qdisc *sch, unsigned long arg) 1312 { 1313 struct hfsc_class *cl = (struct hfsc_class *)arg; 1314 1315 if (--cl->refcnt == 0) 1316 hfsc_destroy_class(sch, cl); 1317 } 1318 1319 static unsigned long 1320 hfsc_bind_tcf(struct Qdisc *sch, unsigned long parent, u32 classid) 1321 { 1322 struct hfsc_class *p = (struct hfsc_class *)parent; 1323 struct hfsc_class *cl = hfsc_find_class(classid, sch); 1324 1325 if (cl != NULL) { 1326 if (p != NULL && p->level <= cl->level) 1327 return 0; 1328 cl->filter_cnt++; 1329 } 1330 1331 return (unsigned long)cl; 1332 } 1333 1334 static void 1335 hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg) 1336 { 1337 struct hfsc_class *cl = (struct hfsc_class *)arg; 1338 1339 cl->filter_cnt--; 1340 } 1341 1342 static struct tcf_proto ** 1343 hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg) 1344 { 1345 struct hfsc_sched *q = qdisc_priv(sch); 1346 struct hfsc_class *cl = (struct hfsc_class *)arg; 1347 1348 if (cl == NULL) 1349 cl = &q->root; 1350 1351 return &cl->filter_list; 1352 } 1353 1354 static int 1355 hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc) 1356 { 1357 struct tc_service_curve tsc; 1358 1359 tsc.m1 = sm2m(sc->sm1); 1360 tsc.d = dx2d(sc->dx); 1361 tsc.m2 = sm2m(sc->sm2); 1362 RTA_PUT(skb, attr, sizeof(tsc), &tsc); 1363 1364 return skb->len; 1365 1366 rtattr_failure: 1367 return -1; 1368 } 1369 1370 static inline int 1371 hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl) 1372 { 1373 if ((cl->cl_flags & HFSC_RSC) && 1374 (hfsc_dump_sc(skb, TCA_HFSC_RSC, &cl->cl_rsc) < 0)) 1375 goto rtattr_failure; 1376 1377 if ((cl->cl_flags & HFSC_FSC) && 1378 (hfsc_dump_sc(skb, TCA_HFSC_FSC, &cl->cl_fsc) < 0)) 1379 goto rtattr_failure; 1380 1381 if ((cl->cl_flags & HFSC_USC) && 1382 (hfsc_dump_sc(skb, TCA_HFSC_USC, &cl->cl_usc) < 0)) 1383 goto rtattr_failure; 1384 1385 return skb->len; 1386 1387 rtattr_failure: 1388 return -1; 1389 } 1390 1391 static int 1392 hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, 1393 struct tcmsg *tcm) 1394 { 1395 struct hfsc_class *cl = (struct hfsc_class *)arg; 1396 unsigned char *b = skb->tail; 1397 struct rtattr *rta = (struct rtattr *)b; 1398 1399 tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT; 1400 tcm->tcm_handle = cl->classid; 1401 if (cl->level == 0) 1402 tcm->tcm_info = cl->qdisc->handle; 1403 1404 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1405 if (hfsc_dump_curves(skb, cl) < 0) 1406 goto rtattr_failure; 1407 rta->rta_len = skb->tail - b; 1408 return skb->len; 1409 1410 rtattr_failure: 1411 skb_trim(skb, b - skb->data); 1412 return -1; 1413 } 1414 1415 static int 1416 hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, 1417 struct gnet_dump *d) 1418 { 1419 struct hfsc_class *cl = (struct hfsc_class *)arg; 1420 struct tc_hfsc_stats xstats; 1421 1422 cl->qstats.qlen = cl->qdisc->q.qlen; 1423 xstats.level = cl->level; 1424 xstats.period = cl->cl_vtperiod; 1425 xstats.work = cl->cl_total; 1426 xstats.rtwork = cl->cl_cumul; 1427 1428 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || 1429 #ifdef CONFIG_NET_ESTIMATOR 1430 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || 1431 #endif 1432 gnet_stats_copy_queue(d, &cl->qstats) < 0) 1433 return -1; 1434 1435 return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); 1436 } 1437 1438 1439 1440 static void 1441 hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg) 1442 { 1443 struct hfsc_sched *q = qdisc_priv(sch); 1444 struct hfsc_class *cl; 1445 unsigned int i; 1446 1447 if (arg->stop) 1448 return; 1449 1450 for (i = 0; i < HFSC_HSIZE; i++) { 1451 list_for_each_entry(cl, &q->clhash[i], hlist) { 1452 if (arg->count < arg->skip) { 1453 arg->count++; 1454 continue; 1455 } 1456 if (arg->fn(sch, (unsigned long)cl, arg) < 0) { 1457 arg->stop = 1; 1458 return; 1459 } 1460 arg->count++; 1461 } 1462 } 1463 } 1464 1465 static void 1466 hfsc_watchdog(unsigned long arg) 1467 { 1468 struct Qdisc *sch = (struct Qdisc *)arg; 1469 1470 sch->flags &= ~TCQ_F_THROTTLED; 1471 netif_schedule(sch->dev); 1472 } 1473 1474 static void 1475 hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time) 1476 { 1477 struct hfsc_sched *q = qdisc_priv(sch); 1478 struct hfsc_class *cl; 1479 u64 next_time = 0; 1480 long delay; 1481 1482 if ((cl = eltree_get_minel(q)) != NULL) 1483 next_time = cl->cl_e; 1484 if (q->root.cl_cfmin != 0) { 1485 if (next_time == 0 || next_time > q->root.cl_cfmin) 1486 next_time = q->root.cl_cfmin; 1487 } 1488 ASSERT(next_time != 0); 1489 delay = next_time - cur_time; 1490 delay = PSCHED_US2JIFFIE(delay); 1491 1492 sch->flags |= TCQ_F_THROTTLED; 1493 mod_timer(&q->wd_timer, jiffies + delay); 1494 } 1495 1496 static int 1497 hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) 1498 { 1499 struct hfsc_sched *q = qdisc_priv(sch); 1500 struct tc_hfsc_qopt *qopt; 1501 unsigned int i; 1502 1503 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt)) 1504 return -EINVAL; 1505 qopt = RTA_DATA(opt); 1506 1507 sch->stats_lock = &sch->dev->queue_lock; 1508 1509 q->defcls = qopt->defcls; 1510 for (i = 0; i < HFSC_HSIZE; i++) 1511 INIT_LIST_HEAD(&q->clhash[i]); 1512 q->eligible = RB_ROOT; 1513 INIT_LIST_HEAD(&q->droplist); 1514 skb_queue_head_init(&q->requeue); 1515 1516 q->root.refcnt = 1; 1517 q->root.classid = sch->handle; 1518 q->root.sched = q; 1519 q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); 1520 if (q->root.qdisc == NULL) 1521 q->root.qdisc = &noop_qdisc; 1522 q->root.stats_lock = &sch->dev->queue_lock; 1523 INIT_LIST_HEAD(&q->root.children); 1524 q->root.vt_tree = RB_ROOT; 1525 q->root.cf_tree = RB_ROOT; 1526 1527 list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]); 1528 1529 init_timer(&q->wd_timer); 1530 q->wd_timer.function = hfsc_watchdog; 1531 q->wd_timer.data = (unsigned long)sch; 1532 1533 return 0; 1534 } 1535 1536 static int 1537 hfsc_change_qdisc(struct Qdisc *sch, struct rtattr *opt) 1538 { 1539 struct hfsc_sched *q = qdisc_priv(sch); 1540 struct tc_hfsc_qopt *qopt; 1541 1542 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt)) 1543 return -EINVAL; 1544 qopt = RTA_DATA(opt); 1545 1546 sch_tree_lock(sch); 1547 q->defcls = qopt->defcls; 1548 sch_tree_unlock(sch); 1549 1550 return 0; 1551 } 1552 1553 static void 1554 hfsc_reset_class(struct hfsc_class *cl) 1555 { 1556 cl->cl_total = 0; 1557 cl->cl_cumul = 0; 1558 cl->cl_d = 0; 1559 cl->cl_e = 0; 1560 cl->cl_vt = 0; 1561 cl->cl_vtadj = 0; 1562 cl->cl_vtoff = 0; 1563 cl->cl_cvtmin = 0; 1564 cl->cl_cvtmax = 0; 1565 cl->cl_cvtoff = 0; 1566 cl->cl_pcvtoff = 0; 1567 cl->cl_vtperiod = 0; 1568 cl->cl_parentperiod = 0; 1569 cl->cl_f = 0; 1570 cl->cl_myf = 0; 1571 cl->cl_myfadj = 0; 1572 cl->cl_cfmin = 0; 1573 cl->cl_nactive = 0; 1574 1575 cl->vt_tree = RB_ROOT; 1576 cl->cf_tree = RB_ROOT; 1577 qdisc_reset(cl->qdisc); 1578 1579 if (cl->cl_flags & HFSC_RSC) 1580 rtsc_init(&cl->cl_deadline, &cl->cl_rsc, 0, 0); 1581 if (cl->cl_flags & HFSC_FSC) 1582 rtsc_init(&cl->cl_virtual, &cl->cl_fsc, 0, 0); 1583 if (cl->cl_flags & HFSC_USC) 1584 rtsc_init(&cl->cl_ulimit, &cl->cl_usc, 0, 0); 1585 } 1586 1587 static void 1588 hfsc_reset_qdisc(struct Qdisc *sch) 1589 { 1590 struct hfsc_sched *q = qdisc_priv(sch); 1591 struct hfsc_class *cl; 1592 unsigned int i; 1593 1594 for (i = 0; i < HFSC_HSIZE; i++) { 1595 list_for_each_entry(cl, &q->clhash[i], hlist) 1596 hfsc_reset_class(cl); 1597 } 1598 __skb_queue_purge(&q->requeue); 1599 q->eligible = RB_ROOT; 1600 INIT_LIST_HEAD(&q->droplist); 1601 del_timer(&q->wd_timer); 1602 sch->flags &= ~TCQ_F_THROTTLED; 1603 sch->q.qlen = 0; 1604 } 1605 1606 static void 1607 hfsc_destroy_qdisc(struct Qdisc *sch) 1608 { 1609 struct hfsc_sched *q = qdisc_priv(sch); 1610 struct hfsc_class *cl, *next; 1611 unsigned int i; 1612 1613 for (i = 0; i < HFSC_HSIZE; i++) { 1614 list_for_each_entry_safe(cl, next, &q->clhash[i], hlist) 1615 hfsc_destroy_class(sch, cl); 1616 } 1617 __skb_queue_purge(&q->requeue); 1618 del_timer(&q->wd_timer); 1619 } 1620 1621 static int 1622 hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) 1623 { 1624 struct hfsc_sched *q = qdisc_priv(sch); 1625 unsigned char *b = skb->tail; 1626 struct tc_hfsc_qopt qopt; 1627 1628 qopt.defcls = q->defcls; 1629 RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt); 1630 return skb->len; 1631 1632 rtattr_failure: 1633 skb_trim(skb, b - skb->data); 1634 return -1; 1635 } 1636 1637 static int 1638 hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) 1639 { 1640 struct hfsc_class *cl; 1641 unsigned int len; 1642 int err; 1643 1644 cl = hfsc_classify(skb, sch, &err); 1645 if (cl == NULL) { 1646 if (err == NET_XMIT_BYPASS) 1647 sch->qstats.drops++; 1648 kfree_skb(skb); 1649 return err; 1650 } 1651 1652 len = skb->len; 1653 err = cl->qdisc->enqueue(skb, cl->qdisc); 1654 if (unlikely(err != NET_XMIT_SUCCESS)) { 1655 cl->qstats.drops++; 1656 sch->qstats.drops++; 1657 return err; 1658 } 1659 1660 if (cl->qdisc->q.qlen == 1) 1661 set_active(cl, len); 1662 1663 cl->bstats.packets++; 1664 cl->bstats.bytes += len; 1665 sch->bstats.packets++; 1666 sch->bstats.bytes += len; 1667 sch->q.qlen++; 1668 1669 return NET_XMIT_SUCCESS; 1670 } 1671 1672 static struct sk_buff * 1673 hfsc_dequeue(struct Qdisc *sch) 1674 { 1675 struct hfsc_sched *q = qdisc_priv(sch); 1676 struct hfsc_class *cl; 1677 struct sk_buff *skb; 1678 u64 cur_time; 1679 unsigned int next_len; 1680 int realtime = 0; 1681 1682 if (sch->q.qlen == 0) 1683 return NULL; 1684 if ((skb = __skb_dequeue(&q->requeue))) 1685 goto out; 1686 1687 PSCHED_GET_TIME(cur_time); 1688 1689 /* 1690 * if there are eligible classes, use real-time criteria. 1691 * find the class with the minimum deadline among 1692 * the eligible classes. 1693 */ 1694 if ((cl = eltree_get_mindl(q, cur_time)) != NULL) { 1695 realtime = 1; 1696 } else { 1697 /* 1698 * use link-sharing criteria 1699 * get the class with the minimum vt in the hierarchy 1700 */ 1701 cl = vttree_get_minvt(&q->root, cur_time); 1702 if (cl == NULL) { 1703 sch->qstats.overlimits++; 1704 hfsc_schedule_watchdog(sch, cur_time); 1705 return NULL; 1706 } 1707 } 1708 1709 skb = cl->qdisc->dequeue(cl->qdisc); 1710 if (skb == NULL) { 1711 if (net_ratelimit()) 1712 printk("HFSC: Non-work-conserving qdisc ?\n"); 1713 return NULL; 1714 } 1715 1716 update_vf(cl, skb->len, cur_time); 1717 if (realtime) 1718 cl->cl_cumul += skb->len; 1719 1720 if (cl->qdisc->q.qlen != 0) { 1721 if (cl->cl_flags & HFSC_RSC) { 1722 /* update ed */ 1723 next_len = qdisc_peek_len(cl->qdisc); 1724 if (realtime) 1725 update_ed(cl, next_len); 1726 else 1727 update_d(cl, next_len); 1728 } 1729 } else { 1730 /* the class becomes passive */ 1731 set_passive(cl); 1732 } 1733 1734 out: 1735 sch->flags &= ~TCQ_F_THROTTLED; 1736 sch->q.qlen--; 1737 1738 return skb; 1739 } 1740 1741 static int 1742 hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch) 1743 { 1744 struct hfsc_sched *q = qdisc_priv(sch); 1745 1746 __skb_queue_head(&q->requeue, skb); 1747 sch->q.qlen++; 1748 sch->qstats.requeues++; 1749 return NET_XMIT_SUCCESS; 1750 } 1751 1752 static unsigned int 1753 hfsc_drop(struct Qdisc *sch) 1754 { 1755 struct hfsc_sched *q = qdisc_priv(sch); 1756 struct hfsc_class *cl; 1757 unsigned int len; 1758 1759 list_for_each_entry(cl, &q->droplist, dlist) { 1760 if (cl->qdisc->ops->drop != NULL && 1761 (len = cl->qdisc->ops->drop(cl->qdisc)) > 0) { 1762 if (cl->qdisc->q.qlen == 0) { 1763 update_vf(cl, 0, 0); 1764 set_passive(cl); 1765 } else { 1766 list_move_tail(&cl->dlist, &q->droplist); 1767 } 1768 cl->qstats.drops++; 1769 sch->qstats.drops++; 1770 sch->q.qlen--; 1771 return len; 1772 } 1773 } 1774 return 0; 1775 } 1776 1777 static struct Qdisc_class_ops hfsc_class_ops = { 1778 .change = hfsc_change_class, 1779 .delete = hfsc_delete_class, 1780 .graft = hfsc_graft_class, 1781 .leaf = hfsc_class_leaf, 1782 .get = hfsc_get_class, 1783 .put = hfsc_put_class, 1784 .bind_tcf = hfsc_bind_tcf, 1785 .unbind_tcf = hfsc_unbind_tcf, 1786 .tcf_chain = hfsc_tcf_chain, 1787 .dump = hfsc_dump_class, 1788 .dump_stats = hfsc_dump_class_stats, 1789 .walk = hfsc_walk 1790 }; 1791 1792 static struct Qdisc_ops hfsc_qdisc_ops = { 1793 .id = "hfsc", 1794 .init = hfsc_init_qdisc, 1795 .change = hfsc_change_qdisc, 1796 .reset = hfsc_reset_qdisc, 1797 .destroy = hfsc_destroy_qdisc, 1798 .dump = hfsc_dump_qdisc, 1799 .enqueue = hfsc_enqueue, 1800 .dequeue = hfsc_dequeue, 1801 .requeue = hfsc_requeue, 1802 .drop = hfsc_drop, 1803 .cl_ops = &hfsc_class_ops, 1804 .priv_size = sizeof(struct hfsc_sched), 1805 .owner = THIS_MODULE 1806 }; 1807 1808 static int __init 1809 hfsc_init(void) 1810 { 1811 return register_qdisc(&hfsc_qdisc_ops); 1812 } 1813 1814 static void __exit 1815 hfsc_cleanup(void) 1816 { 1817 unregister_qdisc(&hfsc_qdisc_ops); 1818 } 1819 1820 MODULE_LICENSE("GPL"); 1821 module_init(hfsc_init); 1822 module_exit(hfsc_cleanup); 1823