1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Data Access Monitor 4 * 5 * Author: SeongJae Park <sjpark@amazon.de> 6 */ 7 8 #define pr_fmt(fmt) "damon: " fmt 9 10 #include <linux/damon.h> 11 #include <linux/delay.h> 12 #include <linux/kthread.h> 13 #include <linux/mm.h> 14 #include <linux/slab.h> 15 #include <linux/string.h> 16 17 #define CREATE_TRACE_POINTS 18 #include <trace/events/damon.h> 19 20 #ifdef CONFIG_DAMON_KUNIT_TEST 21 #undef DAMON_MIN_REGION 22 #define DAMON_MIN_REGION 1 23 #endif 24 25 static DEFINE_MUTEX(damon_lock); 26 static int nr_running_ctxs; 27 28 /* 29 * Construct a damon_region struct 30 * 31 * Returns the pointer to the new struct if success, or NULL otherwise 32 */ 33 struct damon_region *damon_new_region(unsigned long start, unsigned long end) 34 { 35 struct damon_region *region; 36 37 region = kmalloc(sizeof(*region), GFP_KERNEL); 38 if (!region) 39 return NULL; 40 41 region->ar.start = start; 42 region->ar.end = end; 43 region->nr_accesses = 0; 44 INIT_LIST_HEAD(®ion->list); 45 46 region->age = 0; 47 region->last_nr_accesses = 0; 48 49 return region; 50 } 51 52 void damon_add_region(struct damon_region *r, struct damon_target *t) 53 { 54 list_add_tail(&r->list, &t->regions_list); 55 t->nr_regions++; 56 } 57 58 static void damon_del_region(struct damon_region *r, struct damon_target *t) 59 { 60 list_del(&r->list); 61 t->nr_regions--; 62 } 63 64 static void damon_free_region(struct damon_region *r) 65 { 66 kfree(r); 67 } 68 69 void damon_destroy_region(struct damon_region *r, struct damon_target *t) 70 { 71 damon_del_region(r, t); 72 damon_free_region(r); 73 } 74 75 struct damos *damon_new_scheme( 76 unsigned long min_sz_region, unsigned long max_sz_region, 77 unsigned int min_nr_accesses, unsigned int max_nr_accesses, 78 unsigned int min_age_region, unsigned int max_age_region, 79 enum damos_action action, struct damos_quota *quota, 80 struct damos_watermarks *wmarks) 81 { 82 struct damos *scheme; 83 84 scheme = kmalloc(sizeof(*scheme), GFP_KERNEL); 85 if (!scheme) 86 return NULL; 87 scheme->min_sz_region = min_sz_region; 88 scheme->max_sz_region = max_sz_region; 89 scheme->min_nr_accesses = min_nr_accesses; 90 scheme->max_nr_accesses = max_nr_accesses; 91 scheme->min_age_region = min_age_region; 92 scheme->max_age_region = max_age_region; 93 scheme->action = action; 94 scheme->stat = (struct damos_stat){}; 95 INIT_LIST_HEAD(&scheme->list); 96 97 scheme->quota.ms = quota->ms; 98 scheme->quota.sz = quota->sz; 99 scheme->quota.reset_interval = quota->reset_interval; 100 scheme->quota.weight_sz = quota->weight_sz; 101 scheme->quota.weight_nr_accesses = quota->weight_nr_accesses; 102 scheme->quota.weight_age = quota->weight_age; 103 scheme->quota.total_charged_sz = 0; 104 scheme->quota.total_charged_ns = 0; 105 scheme->quota.esz = 0; 106 scheme->quota.charged_sz = 0; 107 scheme->quota.charged_from = 0; 108 scheme->quota.charge_target_from = NULL; 109 scheme->quota.charge_addr_from = 0; 110 111 scheme->wmarks.metric = wmarks->metric; 112 scheme->wmarks.interval = wmarks->interval; 113 scheme->wmarks.high = wmarks->high; 114 scheme->wmarks.mid = wmarks->mid; 115 scheme->wmarks.low = wmarks->low; 116 scheme->wmarks.activated = true; 117 118 return scheme; 119 } 120 121 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) 122 { 123 list_add_tail(&s->list, &ctx->schemes); 124 } 125 126 static void damon_del_scheme(struct damos *s) 127 { 128 list_del(&s->list); 129 } 130 131 static void damon_free_scheme(struct damos *s) 132 { 133 kfree(s); 134 } 135 136 void damon_destroy_scheme(struct damos *s) 137 { 138 damon_del_scheme(s); 139 damon_free_scheme(s); 140 } 141 142 /* 143 * Construct a damon_target struct 144 * 145 * Returns the pointer to the new struct if success, or NULL otherwise 146 */ 147 struct damon_target *damon_new_target(unsigned long id) 148 { 149 struct damon_target *t; 150 151 t = kmalloc(sizeof(*t), GFP_KERNEL); 152 if (!t) 153 return NULL; 154 155 t->id = id; 156 t->nr_regions = 0; 157 INIT_LIST_HEAD(&t->regions_list); 158 159 return t; 160 } 161 162 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t) 163 { 164 list_add_tail(&t->list, &ctx->adaptive_targets); 165 } 166 167 bool damon_targets_empty(struct damon_ctx *ctx) 168 { 169 return list_empty(&ctx->adaptive_targets); 170 } 171 172 static void damon_del_target(struct damon_target *t) 173 { 174 list_del(&t->list); 175 } 176 177 void damon_free_target(struct damon_target *t) 178 { 179 struct damon_region *r, *next; 180 181 damon_for_each_region_safe(r, next, t) 182 damon_free_region(r); 183 kfree(t); 184 } 185 186 void damon_destroy_target(struct damon_target *t) 187 { 188 damon_del_target(t); 189 damon_free_target(t); 190 } 191 192 unsigned int damon_nr_regions(struct damon_target *t) 193 { 194 return t->nr_regions; 195 } 196 197 struct damon_ctx *damon_new_ctx(void) 198 { 199 struct damon_ctx *ctx; 200 201 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 202 if (!ctx) 203 return NULL; 204 205 ctx->sample_interval = 5 * 1000; 206 ctx->aggr_interval = 100 * 1000; 207 ctx->primitive_update_interval = 60 * 1000 * 1000; 208 209 ktime_get_coarse_ts64(&ctx->last_aggregation); 210 ctx->last_primitive_update = ctx->last_aggregation; 211 212 mutex_init(&ctx->kdamond_lock); 213 214 ctx->min_nr_regions = 10; 215 ctx->max_nr_regions = 1000; 216 217 INIT_LIST_HEAD(&ctx->adaptive_targets); 218 INIT_LIST_HEAD(&ctx->schemes); 219 220 return ctx; 221 } 222 223 static void damon_destroy_targets(struct damon_ctx *ctx) 224 { 225 struct damon_target *t, *next_t; 226 227 if (ctx->primitive.cleanup) { 228 ctx->primitive.cleanup(ctx); 229 return; 230 } 231 232 damon_for_each_target_safe(t, next_t, ctx) 233 damon_destroy_target(t); 234 } 235 236 void damon_destroy_ctx(struct damon_ctx *ctx) 237 { 238 struct damos *s, *next_s; 239 240 damon_destroy_targets(ctx); 241 242 damon_for_each_scheme_safe(s, next_s, ctx) 243 damon_destroy_scheme(s); 244 245 kfree(ctx); 246 } 247 248 /** 249 * damon_set_targets() - Set monitoring targets. 250 * @ctx: monitoring context 251 * @ids: array of target ids 252 * @nr_ids: number of entries in @ids 253 * 254 * This function should not be called while the kdamond is running. 255 * 256 * Return: 0 on success, negative error code otherwise. 257 */ 258 int damon_set_targets(struct damon_ctx *ctx, 259 unsigned long *ids, ssize_t nr_ids) 260 { 261 ssize_t i; 262 struct damon_target *t, *next; 263 264 damon_destroy_targets(ctx); 265 266 for (i = 0; i < nr_ids; i++) { 267 t = damon_new_target(ids[i]); 268 if (!t) { 269 /* The caller should do cleanup of the ids itself */ 270 damon_for_each_target_safe(t, next, ctx) 271 damon_destroy_target(t); 272 return -ENOMEM; 273 } 274 damon_add_target(ctx, t); 275 } 276 277 return 0; 278 } 279 280 /** 281 * damon_set_attrs() - Set attributes for the monitoring. 282 * @ctx: monitoring context 283 * @sample_int: time interval between samplings 284 * @aggr_int: time interval between aggregations 285 * @primitive_upd_int: time interval between monitoring primitive updates 286 * @min_nr_reg: minimal number of regions 287 * @max_nr_reg: maximum number of regions 288 * 289 * This function should not be called while the kdamond is running. 290 * Every time interval is in micro-seconds. 291 * 292 * Return: 0 on success, negative error code otherwise. 293 */ 294 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, 295 unsigned long aggr_int, unsigned long primitive_upd_int, 296 unsigned long min_nr_reg, unsigned long max_nr_reg) 297 { 298 if (min_nr_reg < 3) 299 return -EINVAL; 300 if (min_nr_reg > max_nr_reg) 301 return -EINVAL; 302 303 ctx->sample_interval = sample_int; 304 ctx->aggr_interval = aggr_int; 305 ctx->primitive_update_interval = primitive_upd_int; 306 ctx->min_nr_regions = min_nr_reg; 307 ctx->max_nr_regions = max_nr_reg; 308 309 return 0; 310 } 311 312 /** 313 * damon_set_schemes() - Set data access monitoring based operation schemes. 314 * @ctx: monitoring context 315 * @schemes: array of the schemes 316 * @nr_schemes: number of entries in @schemes 317 * 318 * This function should not be called while the kdamond of the context is 319 * running. 320 * 321 * Return: 0 if success, or negative error code otherwise. 322 */ 323 int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, 324 ssize_t nr_schemes) 325 { 326 struct damos *s, *next; 327 ssize_t i; 328 329 damon_for_each_scheme_safe(s, next, ctx) 330 damon_destroy_scheme(s); 331 for (i = 0; i < nr_schemes; i++) 332 damon_add_scheme(ctx, schemes[i]); 333 return 0; 334 } 335 336 /** 337 * damon_nr_running_ctxs() - Return number of currently running contexts. 338 */ 339 int damon_nr_running_ctxs(void) 340 { 341 int nr_ctxs; 342 343 mutex_lock(&damon_lock); 344 nr_ctxs = nr_running_ctxs; 345 mutex_unlock(&damon_lock); 346 347 return nr_ctxs; 348 } 349 350 /* Returns the size upper limit for each monitoring region */ 351 static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) 352 { 353 struct damon_target *t; 354 struct damon_region *r; 355 unsigned long sz = 0; 356 357 damon_for_each_target(t, ctx) { 358 damon_for_each_region(r, t) 359 sz += r->ar.end - r->ar.start; 360 } 361 362 if (ctx->min_nr_regions) 363 sz /= ctx->min_nr_regions; 364 if (sz < DAMON_MIN_REGION) 365 sz = DAMON_MIN_REGION; 366 367 return sz; 368 } 369 370 static int kdamond_fn(void *data); 371 372 /* 373 * __damon_start() - Starts monitoring with given context. 374 * @ctx: monitoring context 375 * 376 * This function should be called while damon_lock is hold. 377 * 378 * Return: 0 on success, negative error code otherwise. 379 */ 380 static int __damon_start(struct damon_ctx *ctx) 381 { 382 int err = -EBUSY; 383 384 mutex_lock(&ctx->kdamond_lock); 385 if (!ctx->kdamond) { 386 err = 0; 387 ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", 388 nr_running_ctxs); 389 if (IS_ERR(ctx->kdamond)) { 390 err = PTR_ERR(ctx->kdamond); 391 ctx->kdamond = NULL; 392 } 393 } 394 mutex_unlock(&ctx->kdamond_lock); 395 396 return err; 397 } 398 399 /** 400 * damon_start() - Starts the monitorings for a given group of contexts. 401 * @ctxs: an array of the pointers for contexts to start monitoring 402 * @nr_ctxs: size of @ctxs 403 * 404 * This function starts a group of monitoring threads for a group of monitoring 405 * contexts. One thread per each context is created and run in parallel. The 406 * caller should handle synchronization between the threads by itself. If a 407 * group of threads that created by other 'damon_start()' call is currently 408 * running, this function does nothing but returns -EBUSY. 409 * 410 * Return: 0 on success, negative error code otherwise. 411 */ 412 int damon_start(struct damon_ctx **ctxs, int nr_ctxs) 413 { 414 int i; 415 int err = 0; 416 417 mutex_lock(&damon_lock); 418 if (nr_running_ctxs) { 419 mutex_unlock(&damon_lock); 420 return -EBUSY; 421 } 422 423 for (i = 0; i < nr_ctxs; i++) { 424 err = __damon_start(ctxs[i]); 425 if (err) 426 break; 427 nr_running_ctxs++; 428 } 429 mutex_unlock(&damon_lock); 430 431 return err; 432 } 433 434 /* 435 * __damon_stop() - Stops monitoring of given context. 436 * @ctx: monitoring context 437 * 438 * Return: 0 on success, negative error code otherwise. 439 */ 440 static int __damon_stop(struct damon_ctx *ctx) 441 { 442 struct task_struct *tsk; 443 444 mutex_lock(&ctx->kdamond_lock); 445 tsk = ctx->kdamond; 446 if (tsk) { 447 get_task_struct(tsk); 448 mutex_unlock(&ctx->kdamond_lock); 449 kthread_stop(tsk); 450 put_task_struct(tsk); 451 return 0; 452 } 453 mutex_unlock(&ctx->kdamond_lock); 454 455 return -EPERM; 456 } 457 458 /** 459 * damon_stop() - Stops the monitorings for a given group of contexts. 460 * @ctxs: an array of the pointers for contexts to stop monitoring 461 * @nr_ctxs: size of @ctxs 462 * 463 * Return: 0 on success, negative error code otherwise. 464 */ 465 int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) 466 { 467 int i, err = 0; 468 469 for (i = 0; i < nr_ctxs; i++) { 470 /* nr_running_ctxs is decremented in kdamond_fn */ 471 err = __damon_stop(ctxs[i]); 472 if (err) 473 return err; 474 } 475 476 return err; 477 } 478 479 /* 480 * damon_check_reset_time_interval() - Check if a time interval is elapsed. 481 * @baseline: the time to check whether the interval has elapsed since 482 * @interval: the time interval (microseconds) 483 * 484 * See whether the given time interval has passed since the given baseline 485 * time. If so, it also updates the baseline to current time for next check. 486 * 487 * Return: true if the time interval has passed, or false otherwise. 488 */ 489 static bool damon_check_reset_time_interval(struct timespec64 *baseline, 490 unsigned long interval) 491 { 492 struct timespec64 now; 493 494 ktime_get_coarse_ts64(&now); 495 if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) < 496 interval * 1000) 497 return false; 498 *baseline = now; 499 return true; 500 } 501 502 /* 503 * Check whether it is time to flush the aggregated information 504 */ 505 static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx) 506 { 507 return damon_check_reset_time_interval(&ctx->last_aggregation, 508 ctx->aggr_interval); 509 } 510 511 /* 512 * Reset the aggregated monitoring results ('nr_accesses' of each region). 513 */ 514 static void kdamond_reset_aggregated(struct damon_ctx *c) 515 { 516 struct damon_target *t; 517 unsigned int ti = 0; /* target's index */ 518 519 damon_for_each_target(t, c) { 520 struct damon_region *r; 521 522 damon_for_each_region(r, t) { 523 trace_damon_aggregated(t, ti, r, damon_nr_regions(t)); 524 r->last_nr_accesses = r->nr_accesses; 525 r->nr_accesses = 0; 526 } 527 ti++; 528 } 529 } 530 531 static void damon_split_region_at(struct damon_ctx *ctx, 532 struct damon_target *t, struct damon_region *r, 533 unsigned long sz_r); 534 535 static bool __damos_valid_target(struct damon_region *r, struct damos *s) 536 { 537 unsigned long sz; 538 539 sz = r->ar.end - r->ar.start; 540 return s->min_sz_region <= sz && sz <= s->max_sz_region && 541 s->min_nr_accesses <= r->nr_accesses && 542 r->nr_accesses <= s->max_nr_accesses && 543 s->min_age_region <= r->age && r->age <= s->max_age_region; 544 } 545 546 static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, 547 struct damon_region *r, struct damos *s) 548 { 549 bool ret = __damos_valid_target(r, s); 550 551 if (!ret || !s->quota.esz || !c->primitive.get_scheme_score) 552 return ret; 553 554 return c->primitive.get_scheme_score(c, t, r, s) >= s->quota.min_score; 555 } 556 557 static void damon_do_apply_schemes(struct damon_ctx *c, 558 struct damon_target *t, 559 struct damon_region *r) 560 { 561 struct damos *s; 562 563 damon_for_each_scheme(s, c) { 564 struct damos_quota *quota = &s->quota; 565 unsigned long sz = r->ar.end - r->ar.start; 566 struct timespec64 begin, end; 567 unsigned long sz_applied = 0; 568 569 if (!s->wmarks.activated) 570 continue; 571 572 /* Check the quota */ 573 if (quota->esz && quota->charged_sz >= quota->esz) 574 continue; 575 576 /* Skip previously charged regions */ 577 if (quota->charge_target_from) { 578 if (t != quota->charge_target_from) 579 continue; 580 if (r == damon_last_region(t)) { 581 quota->charge_target_from = NULL; 582 quota->charge_addr_from = 0; 583 continue; 584 } 585 if (quota->charge_addr_from && 586 r->ar.end <= quota->charge_addr_from) 587 continue; 588 589 if (quota->charge_addr_from && r->ar.start < 590 quota->charge_addr_from) { 591 sz = ALIGN_DOWN(quota->charge_addr_from - 592 r->ar.start, DAMON_MIN_REGION); 593 if (!sz) { 594 if (r->ar.end - r->ar.start <= 595 DAMON_MIN_REGION) 596 continue; 597 sz = DAMON_MIN_REGION; 598 } 599 damon_split_region_at(c, t, r, sz); 600 r = damon_next_region(r); 601 sz = r->ar.end - r->ar.start; 602 } 603 quota->charge_target_from = NULL; 604 quota->charge_addr_from = 0; 605 } 606 607 if (!damos_valid_target(c, t, r, s)) 608 continue; 609 610 /* Apply the scheme */ 611 if (c->primitive.apply_scheme) { 612 if (quota->esz && 613 quota->charged_sz + sz > quota->esz) { 614 sz = ALIGN_DOWN(quota->esz - quota->charged_sz, 615 DAMON_MIN_REGION); 616 if (!sz) 617 goto update_stat; 618 damon_split_region_at(c, t, r, sz); 619 } 620 ktime_get_coarse_ts64(&begin); 621 sz_applied = c->primitive.apply_scheme(c, t, r, s); 622 ktime_get_coarse_ts64(&end); 623 quota->total_charged_ns += timespec64_to_ns(&end) - 624 timespec64_to_ns(&begin); 625 quota->charged_sz += sz; 626 if (quota->esz && quota->charged_sz >= quota->esz) { 627 quota->charge_target_from = t; 628 quota->charge_addr_from = r->ar.end + 1; 629 } 630 } 631 if (s->action != DAMOS_STAT) 632 r->age = 0; 633 634 update_stat: 635 s->stat.nr_tried++; 636 s->stat.sz_tried += sz; 637 if (sz_applied) 638 s->stat.nr_applied++; 639 s->stat.sz_applied += sz_applied; 640 } 641 } 642 643 /* Shouldn't be called if quota->ms and quota->sz are zero */ 644 static void damos_set_effective_quota(struct damos_quota *quota) 645 { 646 unsigned long throughput; 647 unsigned long esz; 648 649 if (!quota->ms) { 650 quota->esz = quota->sz; 651 return; 652 } 653 654 if (quota->total_charged_ns) 655 throughput = quota->total_charged_sz * 1000000 / 656 quota->total_charged_ns; 657 else 658 throughput = PAGE_SIZE * 1024; 659 esz = throughput * quota->ms; 660 661 if (quota->sz && quota->sz < esz) 662 esz = quota->sz; 663 quota->esz = esz; 664 } 665 666 static void kdamond_apply_schemes(struct damon_ctx *c) 667 { 668 struct damon_target *t; 669 struct damon_region *r, *next_r; 670 struct damos *s; 671 672 damon_for_each_scheme(s, c) { 673 struct damos_quota *quota = &s->quota; 674 unsigned long cumulated_sz; 675 unsigned int score, max_score = 0; 676 677 if (!s->wmarks.activated) 678 continue; 679 680 if (!quota->ms && !quota->sz) 681 continue; 682 683 /* New charge window starts */ 684 if (time_after_eq(jiffies, quota->charged_from + 685 msecs_to_jiffies( 686 quota->reset_interval))) { 687 if (quota->esz && quota->charged_sz >= quota->esz) 688 s->stat.qt_exceeds++; 689 quota->total_charged_sz += quota->charged_sz; 690 quota->charged_from = jiffies; 691 quota->charged_sz = 0; 692 damos_set_effective_quota(quota); 693 } 694 695 if (!c->primitive.get_scheme_score) 696 continue; 697 698 /* Fill up the score histogram */ 699 memset(quota->histogram, 0, sizeof(quota->histogram)); 700 damon_for_each_target(t, c) { 701 damon_for_each_region(r, t) { 702 if (!__damos_valid_target(r, s)) 703 continue; 704 score = c->primitive.get_scheme_score( 705 c, t, r, s); 706 quota->histogram[score] += 707 r->ar.end - r->ar.start; 708 if (score > max_score) 709 max_score = score; 710 } 711 } 712 713 /* Set the min score limit */ 714 for (cumulated_sz = 0, score = max_score; ; score--) { 715 cumulated_sz += quota->histogram[score]; 716 if (cumulated_sz >= quota->esz || !score) 717 break; 718 } 719 quota->min_score = score; 720 } 721 722 damon_for_each_target(t, c) { 723 damon_for_each_region_safe(r, next_r, t) 724 damon_do_apply_schemes(c, t, r); 725 } 726 } 727 728 static inline unsigned long sz_damon_region(struct damon_region *r) 729 { 730 return r->ar.end - r->ar.start; 731 } 732 733 /* 734 * Merge two adjacent regions into one region 735 */ 736 static void damon_merge_two_regions(struct damon_target *t, 737 struct damon_region *l, struct damon_region *r) 738 { 739 unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r); 740 741 l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / 742 (sz_l + sz_r); 743 l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); 744 l->ar.end = r->ar.end; 745 damon_destroy_region(r, t); 746 } 747 748 /* 749 * Merge adjacent regions having similar access frequencies 750 * 751 * t target affected by this merge operation 752 * thres '->nr_accesses' diff threshold for the merge 753 * sz_limit size upper limit of each region 754 */ 755 static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, 756 unsigned long sz_limit) 757 { 758 struct damon_region *r, *prev = NULL, *next; 759 760 damon_for_each_region_safe(r, next, t) { 761 if (abs(r->nr_accesses - r->last_nr_accesses) > thres) 762 r->age = 0; 763 else 764 r->age++; 765 766 if (prev && prev->ar.end == r->ar.start && 767 abs(prev->nr_accesses - r->nr_accesses) <= thres && 768 sz_damon_region(prev) + sz_damon_region(r) <= sz_limit) 769 damon_merge_two_regions(t, prev, r); 770 else 771 prev = r; 772 } 773 } 774 775 /* 776 * Merge adjacent regions having similar access frequencies 777 * 778 * threshold '->nr_accesses' diff threshold for the merge 779 * sz_limit size upper limit of each region 780 * 781 * This function merges monitoring target regions which are adjacent and their 782 * access frequencies are similar. This is for minimizing the monitoring 783 * overhead under the dynamically changeable access pattern. If a merge was 784 * unnecessarily made, later 'kdamond_split_regions()' will revert it. 785 */ 786 static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, 787 unsigned long sz_limit) 788 { 789 struct damon_target *t; 790 791 damon_for_each_target(t, c) 792 damon_merge_regions_of(t, threshold, sz_limit); 793 } 794 795 /* 796 * Split a region in two 797 * 798 * r the region to be split 799 * sz_r size of the first sub-region that will be made 800 */ 801 static void damon_split_region_at(struct damon_ctx *ctx, 802 struct damon_target *t, struct damon_region *r, 803 unsigned long sz_r) 804 { 805 struct damon_region *new; 806 807 new = damon_new_region(r->ar.start + sz_r, r->ar.end); 808 if (!new) 809 return; 810 811 r->ar.end = new->ar.start; 812 813 new->age = r->age; 814 new->last_nr_accesses = r->last_nr_accesses; 815 816 damon_insert_region(new, r, damon_next_region(r), t); 817 } 818 819 /* Split every region in the given target into 'nr_subs' regions */ 820 static void damon_split_regions_of(struct damon_ctx *ctx, 821 struct damon_target *t, int nr_subs) 822 { 823 struct damon_region *r, *next; 824 unsigned long sz_region, sz_sub = 0; 825 int i; 826 827 damon_for_each_region_safe(r, next, t) { 828 sz_region = r->ar.end - r->ar.start; 829 830 for (i = 0; i < nr_subs - 1 && 831 sz_region > 2 * DAMON_MIN_REGION; i++) { 832 /* 833 * Randomly select size of left sub-region to be at 834 * least 10 percent and at most 90% of original region 835 */ 836 sz_sub = ALIGN_DOWN(damon_rand(1, 10) * 837 sz_region / 10, DAMON_MIN_REGION); 838 /* Do not allow blank region */ 839 if (sz_sub == 0 || sz_sub >= sz_region) 840 continue; 841 842 damon_split_region_at(ctx, t, r, sz_sub); 843 sz_region = sz_sub; 844 } 845 } 846 } 847 848 /* 849 * Split every target region into randomly-sized small regions 850 * 851 * This function splits every target region into random-sized small regions if 852 * current total number of the regions is equal or smaller than half of the 853 * user-specified maximum number of regions. This is for maximizing the 854 * monitoring accuracy under the dynamically changeable access patterns. If a 855 * split was unnecessarily made, later 'kdamond_merge_regions()' will revert 856 * it. 857 */ 858 static void kdamond_split_regions(struct damon_ctx *ctx) 859 { 860 struct damon_target *t; 861 unsigned int nr_regions = 0; 862 static unsigned int last_nr_regions; 863 int nr_subregions = 2; 864 865 damon_for_each_target(t, ctx) 866 nr_regions += damon_nr_regions(t); 867 868 if (nr_regions > ctx->max_nr_regions / 2) 869 return; 870 871 /* Maybe the middle of the region has different access frequency */ 872 if (last_nr_regions == nr_regions && 873 nr_regions < ctx->max_nr_regions / 3) 874 nr_subregions = 3; 875 876 damon_for_each_target(t, ctx) 877 damon_split_regions_of(ctx, t, nr_subregions); 878 879 last_nr_regions = nr_regions; 880 } 881 882 /* 883 * Check whether it is time to check and apply the target monitoring regions 884 * 885 * Returns true if it is. 886 */ 887 static bool kdamond_need_update_primitive(struct damon_ctx *ctx) 888 { 889 return damon_check_reset_time_interval(&ctx->last_primitive_update, 890 ctx->primitive_update_interval); 891 } 892 893 /* 894 * Check whether current monitoring should be stopped 895 * 896 * The monitoring is stopped when either the user requested to stop, or all 897 * monitoring targets are invalid. 898 * 899 * Returns true if need to stop current monitoring. 900 */ 901 static bool kdamond_need_stop(struct damon_ctx *ctx) 902 { 903 struct damon_target *t; 904 905 if (kthread_should_stop()) 906 return true; 907 908 if (!ctx->primitive.target_valid) 909 return false; 910 911 damon_for_each_target(t, ctx) { 912 if (ctx->primitive.target_valid(t)) 913 return false; 914 } 915 916 return true; 917 } 918 919 static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric) 920 { 921 struct sysinfo i; 922 923 switch (metric) { 924 case DAMOS_WMARK_FREE_MEM_RATE: 925 si_meminfo(&i); 926 return i.freeram * 1000 / i.totalram; 927 default: 928 break; 929 } 930 return -EINVAL; 931 } 932 933 /* 934 * Returns zero if the scheme is active. Else, returns time to wait for next 935 * watermark check in micro-seconds. 936 */ 937 static unsigned long damos_wmark_wait_us(struct damos *scheme) 938 { 939 unsigned long metric; 940 941 if (scheme->wmarks.metric == DAMOS_WMARK_NONE) 942 return 0; 943 944 metric = damos_wmark_metric_value(scheme->wmarks.metric); 945 /* higher than high watermark or lower than low watermark */ 946 if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) { 947 if (scheme->wmarks.activated) 948 pr_debug("deactivate a scheme (%d) for %s wmark\n", 949 scheme->action, 950 metric > scheme->wmarks.high ? 951 "high" : "low"); 952 scheme->wmarks.activated = false; 953 return scheme->wmarks.interval; 954 } 955 956 /* inactive and higher than middle watermark */ 957 if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) && 958 !scheme->wmarks.activated) 959 return scheme->wmarks.interval; 960 961 if (!scheme->wmarks.activated) 962 pr_debug("activate a scheme (%d)\n", scheme->action); 963 scheme->wmarks.activated = true; 964 return 0; 965 } 966 967 static void kdamond_usleep(unsigned long usecs) 968 { 969 /* See Documentation/timers/timers-howto.rst for the thresholds */ 970 if (usecs > 20 * USEC_PER_MSEC) 971 schedule_timeout_idle(usecs_to_jiffies(usecs)); 972 else 973 usleep_idle_range(usecs, usecs + 1); 974 } 975 976 /* Returns negative error code if it's not activated but should return */ 977 static int kdamond_wait_activation(struct damon_ctx *ctx) 978 { 979 struct damos *s; 980 unsigned long wait_time; 981 unsigned long min_wait_time = 0; 982 983 while (!kdamond_need_stop(ctx)) { 984 damon_for_each_scheme(s, ctx) { 985 wait_time = damos_wmark_wait_us(s); 986 if (!min_wait_time || wait_time < min_wait_time) 987 min_wait_time = wait_time; 988 } 989 if (!min_wait_time) 990 return 0; 991 992 kdamond_usleep(min_wait_time); 993 } 994 return -EBUSY; 995 } 996 997 /* 998 * The monitoring daemon that runs as a kernel thread 999 */ 1000 static int kdamond_fn(void *data) 1001 { 1002 struct damon_ctx *ctx = (struct damon_ctx *)data; 1003 struct damon_target *t; 1004 struct damon_region *r, *next; 1005 unsigned int max_nr_accesses = 0; 1006 unsigned long sz_limit = 0; 1007 bool done = false; 1008 1009 pr_debug("kdamond (%d) starts\n", current->pid); 1010 1011 if (ctx->primitive.init) 1012 ctx->primitive.init(ctx); 1013 if (ctx->callback.before_start && ctx->callback.before_start(ctx)) 1014 done = true; 1015 1016 sz_limit = damon_region_sz_limit(ctx); 1017 1018 while (!kdamond_need_stop(ctx) && !done) { 1019 if (kdamond_wait_activation(ctx)) 1020 continue; 1021 1022 if (ctx->primitive.prepare_access_checks) 1023 ctx->primitive.prepare_access_checks(ctx); 1024 if (ctx->callback.after_sampling && 1025 ctx->callback.after_sampling(ctx)) 1026 done = true; 1027 1028 kdamond_usleep(ctx->sample_interval); 1029 1030 if (ctx->primitive.check_accesses) 1031 max_nr_accesses = ctx->primitive.check_accesses(ctx); 1032 1033 if (kdamond_aggregate_interval_passed(ctx)) { 1034 kdamond_merge_regions(ctx, 1035 max_nr_accesses / 10, 1036 sz_limit); 1037 if (ctx->callback.after_aggregation && 1038 ctx->callback.after_aggregation(ctx)) 1039 done = true; 1040 kdamond_apply_schemes(ctx); 1041 kdamond_reset_aggregated(ctx); 1042 kdamond_split_regions(ctx); 1043 if (ctx->primitive.reset_aggregated) 1044 ctx->primitive.reset_aggregated(ctx); 1045 } 1046 1047 if (kdamond_need_update_primitive(ctx)) { 1048 if (ctx->primitive.update) 1049 ctx->primitive.update(ctx); 1050 sz_limit = damon_region_sz_limit(ctx); 1051 } 1052 } 1053 damon_for_each_target(t, ctx) { 1054 damon_for_each_region_safe(r, next, t) 1055 damon_destroy_region(r, t); 1056 } 1057 1058 if (ctx->callback.before_terminate) 1059 ctx->callback.before_terminate(ctx); 1060 if (ctx->primitive.cleanup) 1061 ctx->primitive.cleanup(ctx); 1062 1063 pr_debug("kdamond (%d) finishes\n", current->pid); 1064 mutex_lock(&ctx->kdamond_lock); 1065 ctx->kdamond = NULL; 1066 mutex_unlock(&ctx->kdamond_lock); 1067 1068 mutex_lock(&damon_lock); 1069 nr_running_ctxs--; 1070 mutex_unlock(&damon_lock); 1071 1072 return 0; 1073 } 1074 1075 #include "core-test.h" 1076