1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Data Access Monitor 4 * 5 * Author: SeongJae Park <sjpark@amazon.de> 6 */ 7 8 #define pr_fmt(fmt) "damon: " fmt 9 10 #include <linux/damon.h> 11 #include <linux/delay.h> 12 #include <linux/kthread.h> 13 #include <linux/mm.h> 14 #include <linux/slab.h> 15 #include <linux/string.h> 16 17 #define CREATE_TRACE_POINTS 18 #include <trace/events/damon.h> 19 20 #ifdef CONFIG_DAMON_KUNIT_TEST 21 #undef DAMON_MIN_REGION 22 #define DAMON_MIN_REGION 1 23 #endif 24 25 static DEFINE_MUTEX(damon_lock); 26 static int nr_running_ctxs; 27 static bool running_exclusive_ctxs; 28 29 static DEFINE_MUTEX(damon_ops_lock); 30 static struct damon_operations damon_registered_ops[NR_DAMON_OPS]; 31 32 /* Should be called under damon_ops_lock with id smaller than NR_DAMON_OPS */ 33 static bool damon_registered_ops_id(enum damon_ops_id id) 34 { 35 struct damon_operations empty_ops = {}; 36 37 if (!memcmp(&empty_ops, &damon_registered_ops[id], sizeof(empty_ops))) 38 return false; 39 return true; 40 } 41 42 /** 43 * damon_register_ops() - Register a monitoring operations set to DAMON. 44 * @ops: monitoring operations set to register. 45 * 46 * This function registers a monitoring operations set of valid &struct 47 * damon_operations->id so that others can find and use them later. 48 * 49 * Return: 0 on success, negative error code otherwise. 50 */ 51 int damon_register_ops(struct damon_operations *ops) 52 { 53 int err = 0; 54 55 if (ops->id >= NR_DAMON_OPS) 56 return -EINVAL; 57 mutex_lock(&damon_ops_lock); 58 /* Fail for already registered ops */ 59 if (damon_registered_ops_id(ops->id)) { 60 err = -EINVAL; 61 goto out; 62 } 63 damon_registered_ops[ops->id] = *ops; 64 out: 65 mutex_unlock(&damon_ops_lock); 66 return err; 67 } 68 69 /** 70 * damon_select_ops() - Select a monitoring operations to use with the context. 71 * @ctx: monitoring context to use the operations. 72 * @id: id of the registered monitoring operations to select. 73 * 74 * This function finds registered monitoring operations set of @id and make 75 * @ctx to use it. 76 * 77 * Return: 0 on success, negative error code otherwise. 78 */ 79 int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id) 80 { 81 int err = 0; 82 83 if (id >= NR_DAMON_OPS) 84 return -EINVAL; 85 86 mutex_lock(&damon_ops_lock); 87 if (!damon_registered_ops_id(id)) 88 err = -EINVAL; 89 else 90 ctx->ops = damon_registered_ops[id]; 91 mutex_unlock(&damon_ops_lock); 92 return err; 93 } 94 95 /* 96 * Construct a damon_region struct 97 * 98 * Returns the pointer to the new struct if success, or NULL otherwise 99 */ 100 struct damon_region *damon_new_region(unsigned long start, unsigned long end) 101 { 102 struct damon_region *region; 103 104 region = kmalloc(sizeof(*region), GFP_KERNEL); 105 if (!region) 106 return NULL; 107 108 region->ar.start = start; 109 region->ar.end = end; 110 region->nr_accesses = 0; 111 INIT_LIST_HEAD(®ion->list); 112 113 region->age = 0; 114 region->last_nr_accesses = 0; 115 116 return region; 117 } 118 119 void damon_add_region(struct damon_region *r, struct damon_target *t) 120 { 121 list_add_tail(&r->list, &t->regions_list); 122 t->nr_regions++; 123 } 124 125 static void damon_del_region(struct damon_region *r, struct damon_target *t) 126 { 127 list_del(&r->list); 128 t->nr_regions--; 129 } 130 131 static void damon_free_region(struct damon_region *r) 132 { 133 kfree(r); 134 } 135 136 void damon_destroy_region(struct damon_region *r, struct damon_target *t) 137 { 138 damon_del_region(r, t); 139 damon_free_region(r); 140 } 141 142 struct damos *damon_new_scheme( 143 unsigned long min_sz_region, unsigned long max_sz_region, 144 unsigned int min_nr_accesses, unsigned int max_nr_accesses, 145 unsigned int min_age_region, unsigned int max_age_region, 146 enum damos_action action, struct damos_quota *quota, 147 struct damos_watermarks *wmarks) 148 { 149 struct damos *scheme; 150 151 scheme = kmalloc(sizeof(*scheme), GFP_KERNEL); 152 if (!scheme) 153 return NULL; 154 scheme->min_sz_region = min_sz_region; 155 scheme->max_sz_region = max_sz_region; 156 scheme->min_nr_accesses = min_nr_accesses; 157 scheme->max_nr_accesses = max_nr_accesses; 158 scheme->min_age_region = min_age_region; 159 scheme->max_age_region = max_age_region; 160 scheme->action = action; 161 scheme->stat = (struct damos_stat){}; 162 INIT_LIST_HEAD(&scheme->list); 163 164 scheme->quota.ms = quota->ms; 165 scheme->quota.sz = quota->sz; 166 scheme->quota.reset_interval = quota->reset_interval; 167 scheme->quota.weight_sz = quota->weight_sz; 168 scheme->quota.weight_nr_accesses = quota->weight_nr_accesses; 169 scheme->quota.weight_age = quota->weight_age; 170 scheme->quota.total_charged_sz = 0; 171 scheme->quota.total_charged_ns = 0; 172 scheme->quota.esz = 0; 173 scheme->quota.charged_sz = 0; 174 scheme->quota.charged_from = 0; 175 scheme->quota.charge_target_from = NULL; 176 scheme->quota.charge_addr_from = 0; 177 178 scheme->wmarks.metric = wmarks->metric; 179 scheme->wmarks.interval = wmarks->interval; 180 scheme->wmarks.high = wmarks->high; 181 scheme->wmarks.mid = wmarks->mid; 182 scheme->wmarks.low = wmarks->low; 183 scheme->wmarks.activated = true; 184 185 return scheme; 186 } 187 188 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) 189 { 190 list_add_tail(&s->list, &ctx->schemes); 191 } 192 193 static void damon_del_scheme(struct damos *s) 194 { 195 list_del(&s->list); 196 } 197 198 static void damon_free_scheme(struct damos *s) 199 { 200 kfree(s); 201 } 202 203 void damon_destroy_scheme(struct damos *s) 204 { 205 damon_del_scheme(s); 206 damon_free_scheme(s); 207 } 208 209 /* 210 * Construct a damon_target struct 211 * 212 * Returns the pointer to the new struct if success, or NULL otherwise 213 */ 214 struct damon_target *damon_new_target(void) 215 { 216 struct damon_target *t; 217 218 t = kmalloc(sizeof(*t), GFP_KERNEL); 219 if (!t) 220 return NULL; 221 222 t->pid = NULL; 223 t->nr_regions = 0; 224 INIT_LIST_HEAD(&t->regions_list); 225 226 return t; 227 } 228 229 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t) 230 { 231 list_add_tail(&t->list, &ctx->adaptive_targets); 232 } 233 234 bool damon_targets_empty(struct damon_ctx *ctx) 235 { 236 return list_empty(&ctx->adaptive_targets); 237 } 238 239 static void damon_del_target(struct damon_target *t) 240 { 241 list_del(&t->list); 242 } 243 244 void damon_free_target(struct damon_target *t) 245 { 246 struct damon_region *r, *next; 247 248 damon_for_each_region_safe(r, next, t) 249 damon_free_region(r); 250 kfree(t); 251 } 252 253 void damon_destroy_target(struct damon_target *t) 254 { 255 damon_del_target(t); 256 damon_free_target(t); 257 } 258 259 unsigned int damon_nr_regions(struct damon_target *t) 260 { 261 return t->nr_regions; 262 } 263 264 struct damon_ctx *damon_new_ctx(void) 265 { 266 struct damon_ctx *ctx; 267 268 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 269 if (!ctx) 270 return NULL; 271 272 ctx->sample_interval = 5 * 1000; 273 ctx->aggr_interval = 100 * 1000; 274 ctx->ops_update_interval = 60 * 1000 * 1000; 275 276 ktime_get_coarse_ts64(&ctx->last_aggregation); 277 ctx->last_ops_update = ctx->last_aggregation; 278 279 mutex_init(&ctx->kdamond_lock); 280 281 ctx->min_nr_regions = 10; 282 ctx->max_nr_regions = 1000; 283 284 INIT_LIST_HEAD(&ctx->adaptive_targets); 285 INIT_LIST_HEAD(&ctx->schemes); 286 287 return ctx; 288 } 289 290 static void damon_destroy_targets(struct damon_ctx *ctx) 291 { 292 struct damon_target *t, *next_t; 293 294 if (ctx->ops.cleanup) { 295 ctx->ops.cleanup(ctx); 296 return; 297 } 298 299 damon_for_each_target_safe(t, next_t, ctx) 300 damon_destroy_target(t); 301 } 302 303 void damon_destroy_ctx(struct damon_ctx *ctx) 304 { 305 struct damos *s, *next_s; 306 307 damon_destroy_targets(ctx); 308 309 damon_for_each_scheme_safe(s, next_s, ctx) 310 damon_destroy_scheme(s); 311 312 kfree(ctx); 313 } 314 315 /** 316 * damon_set_attrs() - Set attributes for the monitoring. 317 * @ctx: monitoring context 318 * @sample_int: time interval between samplings 319 * @aggr_int: time interval between aggregations 320 * @ops_upd_int: time interval between monitoring operations updates 321 * @min_nr_reg: minimal number of regions 322 * @max_nr_reg: maximum number of regions 323 * 324 * This function should not be called while the kdamond is running. 325 * Every time interval is in micro-seconds. 326 * 327 * Return: 0 on success, negative error code otherwise. 328 */ 329 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, 330 unsigned long aggr_int, unsigned long ops_upd_int, 331 unsigned long min_nr_reg, unsigned long max_nr_reg) 332 { 333 if (min_nr_reg < 3) 334 return -EINVAL; 335 if (min_nr_reg > max_nr_reg) 336 return -EINVAL; 337 338 ctx->sample_interval = sample_int; 339 ctx->aggr_interval = aggr_int; 340 ctx->ops_update_interval = ops_upd_int; 341 ctx->min_nr_regions = min_nr_reg; 342 ctx->max_nr_regions = max_nr_reg; 343 344 return 0; 345 } 346 347 /** 348 * damon_set_schemes() - Set data access monitoring based operation schemes. 349 * @ctx: monitoring context 350 * @schemes: array of the schemes 351 * @nr_schemes: number of entries in @schemes 352 * 353 * This function should not be called while the kdamond of the context is 354 * running. 355 * 356 * Return: 0 if success, or negative error code otherwise. 357 */ 358 int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, 359 ssize_t nr_schemes) 360 { 361 struct damos *s, *next; 362 ssize_t i; 363 364 damon_for_each_scheme_safe(s, next, ctx) 365 damon_destroy_scheme(s); 366 for (i = 0; i < nr_schemes; i++) 367 damon_add_scheme(ctx, schemes[i]); 368 return 0; 369 } 370 371 /** 372 * damon_nr_running_ctxs() - Return number of currently running contexts. 373 */ 374 int damon_nr_running_ctxs(void) 375 { 376 int nr_ctxs; 377 378 mutex_lock(&damon_lock); 379 nr_ctxs = nr_running_ctxs; 380 mutex_unlock(&damon_lock); 381 382 return nr_ctxs; 383 } 384 385 /* Returns the size upper limit for each monitoring region */ 386 static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) 387 { 388 struct damon_target *t; 389 struct damon_region *r; 390 unsigned long sz = 0; 391 392 damon_for_each_target(t, ctx) { 393 damon_for_each_region(r, t) 394 sz += r->ar.end - r->ar.start; 395 } 396 397 if (ctx->min_nr_regions) 398 sz /= ctx->min_nr_regions; 399 if (sz < DAMON_MIN_REGION) 400 sz = DAMON_MIN_REGION; 401 402 return sz; 403 } 404 405 static int kdamond_fn(void *data); 406 407 /* 408 * __damon_start() - Starts monitoring with given context. 409 * @ctx: monitoring context 410 * 411 * This function should be called while damon_lock is hold. 412 * 413 * Return: 0 on success, negative error code otherwise. 414 */ 415 static int __damon_start(struct damon_ctx *ctx) 416 { 417 int err = -EBUSY; 418 419 mutex_lock(&ctx->kdamond_lock); 420 if (!ctx->kdamond) { 421 err = 0; 422 ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", 423 nr_running_ctxs); 424 if (IS_ERR(ctx->kdamond)) { 425 err = PTR_ERR(ctx->kdamond); 426 ctx->kdamond = NULL; 427 } 428 } 429 mutex_unlock(&ctx->kdamond_lock); 430 431 return err; 432 } 433 434 /** 435 * damon_start() - Starts the monitorings for a given group of contexts. 436 * @ctxs: an array of the pointers for contexts to start monitoring 437 * @nr_ctxs: size of @ctxs 438 * @exclusive: exclusiveness of this contexts group 439 * 440 * This function starts a group of monitoring threads for a group of monitoring 441 * contexts. One thread per each context is created and run in parallel. The 442 * caller should handle synchronization between the threads by itself. If 443 * @exclusive is true and a group of threads that created by other 444 * 'damon_start()' call is currently running, this function does nothing but 445 * returns -EBUSY. 446 * 447 * Return: 0 on success, negative error code otherwise. 448 */ 449 int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive) 450 { 451 int i; 452 int err = 0; 453 454 mutex_lock(&damon_lock); 455 if ((exclusive && nr_running_ctxs) || 456 (!exclusive && running_exclusive_ctxs)) { 457 mutex_unlock(&damon_lock); 458 return -EBUSY; 459 } 460 461 for (i = 0; i < nr_ctxs; i++) { 462 err = __damon_start(ctxs[i]); 463 if (err) 464 break; 465 nr_running_ctxs++; 466 } 467 if (exclusive && nr_running_ctxs) 468 running_exclusive_ctxs = true; 469 mutex_unlock(&damon_lock); 470 471 return err; 472 } 473 474 /* 475 * __damon_stop() - Stops monitoring of a given context. 476 * @ctx: monitoring context 477 * 478 * Return: 0 on success, negative error code otherwise. 479 */ 480 static int __damon_stop(struct damon_ctx *ctx) 481 { 482 struct task_struct *tsk; 483 484 mutex_lock(&ctx->kdamond_lock); 485 tsk = ctx->kdamond; 486 if (tsk) { 487 get_task_struct(tsk); 488 mutex_unlock(&ctx->kdamond_lock); 489 kthread_stop(tsk); 490 put_task_struct(tsk); 491 return 0; 492 } 493 mutex_unlock(&ctx->kdamond_lock); 494 495 return -EPERM; 496 } 497 498 /** 499 * damon_stop() - Stops the monitorings for a given group of contexts. 500 * @ctxs: an array of the pointers for contexts to stop monitoring 501 * @nr_ctxs: size of @ctxs 502 * 503 * Return: 0 on success, negative error code otherwise. 504 */ 505 int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) 506 { 507 int i, err = 0; 508 509 for (i = 0; i < nr_ctxs; i++) { 510 /* nr_running_ctxs is decremented in kdamond_fn */ 511 err = __damon_stop(ctxs[i]); 512 if (err) 513 break; 514 } 515 return err; 516 } 517 518 /* 519 * damon_check_reset_time_interval() - Check if a time interval is elapsed. 520 * @baseline: the time to check whether the interval has elapsed since 521 * @interval: the time interval (microseconds) 522 * 523 * See whether the given time interval has passed since the given baseline 524 * time. If so, it also updates the baseline to current time for next check. 525 * 526 * Return: true if the time interval has passed, or false otherwise. 527 */ 528 static bool damon_check_reset_time_interval(struct timespec64 *baseline, 529 unsigned long interval) 530 { 531 struct timespec64 now; 532 533 ktime_get_coarse_ts64(&now); 534 if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) < 535 interval * 1000) 536 return false; 537 *baseline = now; 538 return true; 539 } 540 541 /* 542 * Check whether it is time to flush the aggregated information 543 */ 544 static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx) 545 { 546 return damon_check_reset_time_interval(&ctx->last_aggregation, 547 ctx->aggr_interval); 548 } 549 550 /* 551 * Reset the aggregated monitoring results ('nr_accesses' of each region). 552 */ 553 static void kdamond_reset_aggregated(struct damon_ctx *c) 554 { 555 struct damon_target *t; 556 unsigned int ti = 0; /* target's index */ 557 558 damon_for_each_target(t, c) { 559 struct damon_region *r; 560 561 damon_for_each_region(r, t) { 562 trace_damon_aggregated(t, ti, r, damon_nr_regions(t)); 563 r->last_nr_accesses = r->nr_accesses; 564 r->nr_accesses = 0; 565 } 566 ti++; 567 } 568 } 569 570 static void damon_split_region_at(struct damon_ctx *ctx, 571 struct damon_target *t, struct damon_region *r, 572 unsigned long sz_r); 573 574 static bool __damos_valid_target(struct damon_region *r, struct damos *s) 575 { 576 unsigned long sz; 577 578 sz = r->ar.end - r->ar.start; 579 return s->min_sz_region <= sz && sz <= s->max_sz_region && 580 s->min_nr_accesses <= r->nr_accesses && 581 r->nr_accesses <= s->max_nr_accesses && 582 s->min_age_region <= r->age && r->age <= s->max_age_region; 583 } 584 585 static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, 586 struct damon_region *r, struct damos *s) 587 { 588 bool ret = __damos_valid_target(r, s); 589 590 if (!ret || !s->quota.esz || !c->ops.get_scheme_score) 591 return ret; 592 593 return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score; 594 } 595 596 static void damon_do_apply_schemes(struct damon_ctx *c, 597 struct damon_target *t, 598 struct damon_region *r) 599 { 600 struct damos *s; 601 602 damon_for_each_scheme(s, c) { 603 struct damos_quota *quota = &s->quota; 604 unsigned long sz = r->ar.end - r->ar.start; 605 struct timespec64 begin, end; 606 unsigned long sz_applied = 0; 607 608 if (!s->wmarks.activated) 609 continue; 610 611 /* Check the quota */ 612 if (quota->esz && quota->charged_sz >= quota->esz) 613 continue; 614 615 /* Skip previously charged regions */ 616 if (quota->charge_target_from) { 617 if (t != quota->charge_target_from) 618 continue; 619 if (r == damon_last_region(t)) { 620 quota->charge_target_from = NULL; 621 quota->charge_addr_from = 0; 622 continue; 623 } 624 if (quota->charge_addr_from && 625 r->ar.end <= quota->charge_addr_from) 626 continue; 627 628 if (quota->charge_addr_from && r->ar.start < 629 quota->charge_addr_from) { 630 sz = ALIGN_DOWN(quota->charge_addr_from - 631 r->ar.start, DAMON_MIN_REGION); 632 if (!sz) { 633 if (r->ar.end - r->ar.start <= 634 DAMON_MIN_REGION) 635 continue; 636 sz = DAMON_MIN_REGION; 637 } 638 damon_split_region_at(c, t, r, sz); 639 r = damon_next_region(r); 640 sz = r->ar.end - r->ar.start; 641 } 642 quota->charge_target_from = NULL; 643 quota->charge_addr_from = 0; 644 } 645 646 if (!damos_valid_target(c, t, r, s)) 647 continue; 648 649 /* Apply the scheme */ 650 if (c->ops.apply_scheme) { 651 if (quota->esz && 652 quota->charged_sz + sz > quota->esz) { 653 sz = ALIGN_DOWN(quota->esz - quota->charged_sz, 654 DAMON_MIN_REGION); 655 if (!sz) 656 goto update_stat; 657 damon_split_region_at(c, t, r, sz); 658 } 659 ktime_get_coarse_ts64(&begin); 660 sz_applied = c->ops.apply_scheme(c, t, r, s); 661 ktime_get_coarse_ts64(&end); 662 quota->total_charged_ns += timespec64_to_ns(&end) - 663 timespec64_to_ns(&begin); 664 quota->charged_sz += sz; 665 if (quota->esz && quota->charged_sz >= quota->esz) { 666 quota->charge_target_from = t; 667 quota->charge_addr_from = r->ar.end + 1; 668 } 669 } 670 if (s->action != DAMOS_STAT) 671 r->age = 0; 672 673 update_stat: 674 s->stat.nr_tried++; 675 s->stat.sz_tried += sz; 676 if (sz_applied) 677 s->stat.nr_applied++; 678 s->stat.sz_applied += sz_applied; 679 } 680 } 681 682 /* Shouldn't be called if quota->ms and quota->sz are zero */ 683 static void damos_set_effective_quota(struct damos_quota *quota) 684 { 685 unsigned long throughput; 686 unsigned long esz; 687 688 if (!quota->ms) { 689 quota->esz = quota->sz; 690 return; 691 } 692 693 if (quota->total_charged_ns) 694 throughput = quota->total_charged_sz * 1000000 / 695 quota->total_charged_ns; 696 else 697 throughput = PAGE_SIZE * 1024; 698 esz = throughput * quota->ms; 699 700 if (quota->sz && quota->sz < esz) 701 esz = quota->sz; 702 quota->esz = esz; 703 } 704 705 static void kdamond_apply_schemes(struct damon_ctx *c) 706 { 707 struct damon_target *t; 708 struct damon_region *r, *next_r; 709 struct damos *s; 710 711 damon_for_each_scheme(s, c) { 712 struct damos_quota *quota = &s->quota; 713 unsigned long cumulated_sz; 714 unsigned int score, max_score = 0; 715 716 if (!s->wmarks.activated) 717 continue; 718 719 if (!quota->ms && !quota->sz) 720 continue; 721 722 /* New charge window starts */ 723 if (time_after_eq(jiffies, quota->charged_from + 724 msecs_to_jiffies( 725 quota->reset_interval))) { 726 if (quota->esz && quota->charged_sz >= quota->esz) 727 s->stat.qt_exceeds++; 728 quota->total_charged_sz += quota->charged_sz; 729 quota->charged_from = jiffies; 730 quota->charged_sz = 0; 731 damos_set_effective_quota(quota); 732 } 733 734 if (!c->ops.get_scheme_score) 735 continue; 736 737 /* Fill up the score histogram */ 738 memset(quota->histogram, 0, sizeof(quota->histogram)); 739 damon_for_each_target(t, c) { 740 damon_for_each_region(r, t) { 741 if (!__damos_valid_target(r, s)) 742 continue; 743 score = c->ops.get_scheme_score( 744 c, t, r, s); 745 quota->histogram[score] += 746 r->ar.end - r->ar.start; 747 if (score > max_score) 748 max_score = score; 749 } 750 } 751 752 /* Set the min score limit */ 753 for (cumulated_sz = 0, score = max_score; ; score--) { 754 cumulated_sz += quota->histogram[score]; 755 if (cumulated_sz >= quota->esz || !score) 756 break; 757 } 758 quota->min_score = score; 759 } 760 761 damon_for_each_target(t, c) { 762 damon_for_each_region_safe(r, next_r, t) 763 damon_do_apply_schemes(c, t, r); 764 } 765 } 766 767 static inline unsigned long sz_damon_region(struct damon_region *r) 768 { 769 return r->ar.end - r->ar.start; 770 } 771 772 /* 773 * Merge two adjacent regions into one region 774 */ 775 static void damon_merge_two_regions(struct damon_target *t, 776 struct damon_region *l, struct damon_region *r) 777 { 778 unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r); 779 780 l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / 781 (sz_l + sz_r); 782 l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); 783 l->ar.end = r->ar.end; 784 damon_destroy_region(r, t); 785 } 786 787 /* 788 * Merge adjacent regions having similar access frequencies 789 * 790 * t target affected by this merge operation 791 * thres '->nr_accesses' diff threshold for the merge 792 * sz_limit size upper limit of each region 793 */ 794 static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, 795 unsigned long sz_limit) 796 { 797 struct damon_region *r, *prev = NULL, *next; 798 799 damon_for_each_region_safe(r, next, t) { 800 if (abs(r->nr_accesses - r->last_nr_accesses) > thres) 801 r->age = 0; 802 else 803 r->age++; 804 805 if (prev && prev->ar.end == r->ar.start && 806 abs(prev->nr_accesses - r->nr_accesses) <= thres && 807 sz_damon_region(prev) + sz_damon_region(r) <= sz_limit) 808 damon_merge_two_regions(t, prev, r); 809 else 810 prev = r; 811 } 812 } 813 814 /* 815 * Merge adjacent regions having similar access frequencies 816 * 817 * threshold '->nr_accesses' diff threshold for the merge 818 * sz_limit size upper limit of each region 819 * 820 * This function merges monitoring target regions which are adjacent and their 821 * access frequencies are similar. This is for minimizing the monitoring 822 * overhead under the dynamically changeable access pattern. If a merge was 823 * unnecessarily made, later 'kdamond_split_regions()' will revert it. 824 */ 825 static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, 826 unsigned long sz_limit) 827 { 828 struct damon_target *t; 829 830 damon_for_each_target(t, c) 831 damon_merge_regions_of(t, threshold, sz_limit); 832 } 833 834 /* 835 * Split a region in two 836 * 837 * r the region to be split 838 * sz_r size of the first sub-region that will be made 839 */ 840 static void damon_split_region_at(struct damon_ctx *ctx, 841 struct damon_target *t, struct damon_region *r, 842 unsigned long sz_r) 843 { 844 struct damon_region *new; 845 846 new = damon_new_region(r->ar.start + sz_r, r->ar.end); 847 if (!new) 848 return; 849 850 r->ar.end = new->ar.start; 851 852 new->age = r->age; 853 new->last_nr_accesses = r->last_nr_accesses; 854 855 damon_insert_region(new, r, damon_next_region(r), t); 856 } 857 858 /* Split every region in the given target into 'nr_subs' regions */ 859 static void damon_split_regions_of(struct damon_ctx *ctx, 860 struct damon_target *t, int nr_subs) 861 { 862 struct damon_region *r, *next; 863 unsigned long sz_region, sz_sub = 0; 864 int i; 865 866 damon_for_each_region_safe(r, next, t) { 867 sz_region = r->ar.end - r->ar.start; 868 869 for (i = 0; i < nr_subs - 1 && 870 sz_region > 2 * DAMON_MIN_REGION; i++) { 871 /* 872 * Randomly select size of left sub-region to be at 873 * least 10 percent and at most 90% of original region 874 */ 875 sz_sub = ALIGN_DOWN(damon_rand(1, 10) * 876 sz_region / 10, DAMON_MIN_REGION); 877 /* Do not allow blank region */ 878 if (sz_sub == 0 || sz_sub >= sz_region) 879 continue; 880 881 damon_split_region_at(ctx, t, r, sz_sub); 882 sz_region = sz_sub; 883 } 884 } 885 } 886 887 /* 888 * Split every target region into randomly-sized small regions 889 * 890 * This function splits every target region into random-sized small regions if 891 * current total number of the regions is equal or smaller than half of the 892 * user-specified maximum number of regions. This is for maximizing the 893 * monitoring accuracy under the dynamically changeable access patterns. If a 894 * split was unnecessarily made, later 'kdamond_merge_regions()' will revert 895 * it. 896 */ 897 static void kdamond_split_regions(struct damon_ctx *ctx) 898 { 899 struct damon_target *t; 900 unsigned int nr_regions = 0; 901 static unsigned int last_nr_regions; 902 int nr_subregions = 2; 903 904 damon_for_each_target(t, ctx) 905 nr_regions += damon_nr_regions(t); 906 907 if (nr_regions > ctx->max_nr_regions / 2) 908 return; 909 910 /* Maybe the middle of the region has different access frequency */ 911 if (last_nr_regions == nr_regions && 912 nr_regions < ctx->max_nr_regions / 3) 913 nr_subregions = 3; 914 915 damon_for_each_target(t, ctx) 916 damon_split_regions_of(ctx, t, nr_subregions); 917 918 last_nr_regions = nr_regions; 919 } 920 921 /* 922 * Check whether it is time to check and apply the operations-related data 923 * structures. 924 * 925 * Returns true if it is. 926 */ 927 static bool kdamond_need_update_operations(struct damon_ctx *ctx) 928 { 929 return damon_check_reset_time_interval(&ctx->last_ops_update, 930 ctx->ops_update_interval); 931 } 932 933 /* 934 * Check whether current monitoring should be stopped 935 * 936 * The monitoring is stopped when either the user requested to stop, or all 937 * monitoring targets are invalid. 938 * 939 * Returns true if need to stop current monitoring. 940 */ 941 static bool kdamond_need_stop(struct damon_ctx *ctx) 942 { 943 struct damon_target *t; 944 945 if (kthread_should_stop()) 946 return true; 947 948 if (!ctx->ops.target_valid) 949 return false; 950 951 damon_for_each_target(t, ctx) { 952 if (ctx->ops.target_valid(t)) 953 return false; 954 } 955 956 return true; 957 } 958 959 static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric) 960 { 961 struct sysinfo i; 962 963 switch (metric) { 964 case DAMOS_WMARK_FREE_MEM_RATE: 965 si_meminfo(&i); 966 return i.freeram * 1000 / i.totalram; 967 default: 968 break; 969 } 970 return -EINVAL; 971 } 972 973 /* 974 * Returns zero if the scheme is active. Else, returns time to wait for next 975 * watermark check in micro-seconds. 976 */ 977 static unsigned long damos_wmark_wait_us(struct damos *scheme) 978 { 979 unsigned long metric; 980 981 if (scheme->wmarks.metric == DAMOS_WMARK_NONE) 982 return 0; 983 984 metric = damos_wmark_metric_value(scheme->wmarks.metric); 985 /* higher than high watermark or lower than low watermark */ 986 if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) { 987 if (scheme->wmarks.activated) 988 pr_debug("deactivate a scheme (%d) for %s wmark\n", 989 scheme->action, 990 metric > scheme->wmarks.high ? 991 "high" : "low"); 992 scheme->wmarks.activated = false; 993 return scheme->wmarks.interval; 994 } 995 996 /* inactive and higher than middle watermark */ 997 if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) && 998 !scheme->wmarks.activated) 999 return scheme->wmarks.interval; 1000 1001 if (!scheme->wmarks.activated) 1002 pr_debug("activate a scheme (%d)\n", scheme->action); 1003 scheme->wmarks.activated = true; 1004 return 0; 1005 } 1006 1007 static void kdamond_usleep(unsigned long usecs) 1008 { 1009 /* See Documentation/timers/timers-howto.rst for the thresholds */ 1010 if (usecs > 20 * USEC_PER_MSEC) 1011 schedule_timeout_idle(usecs_to_jiffies(usecs)); 1012 else 1013 usleep_idle_range(usecs, usecs + 1); 1014 } 1015 1016 /* Returns negative error code if it's not activated but should return */ 1017 static int kdamond_wait_activation(struct damon_ctx *ctx) 1018 { 1019 struct damos *s; 1020 unsigned long wait_time; 1021 unsigned long min_wait_time = 0; 1022 bool init_wait_time = false; 1023 1024 while (!kdamond_need_stop(ctx)) { 1025 damon_for_each_scheme(s, ctx) { 1026 wait_time = damos_wmark_wait_us(s); 1027 if (!init_wait_time || wait_time < min_wait_time) { 1028 init_wait_time = true; 1029 min_wait_time = wait_time; 1030 } 1031 } 1032 if (!min_wait_time) 1033 return 0; 1034 1035 kdamond_usleep(min_wait_time); 1036 } 1037 return -EBUSY; 1038 } 1039 1040 /* 1041 * The monitoring daemon that runs as a kernel thread 1042 */ 1043 static int kdamond_fn(void *data) 1044 { 1045 struct damon_ctx *ctx = (struct damon_ctx *)data; 1046 struct damon_target *t; 1047 struct damon_region *r, *next; 1048 unsigned int max_nr_accesses = 0; 1049 unsigned long sz_limit = 0; 1050 bool done = false; 1051 1052 pr_debug("kdamond (%d) starts\n", current->pid); 1053 1054 if (ctx->ops.init) 1055 ctx->ops.init(ctx); 1056 if (ctx->callback.before_start && ctx->callback.before_start(ctx)) 1057 done = true; 1058 1059 sz_limit = damon_region_sz_limit(ctx); 1060 1061 while (!kdamond_need_stop(ctx) && !done) { 1062 if (kdamond_wait_activation(ctx)) 1063 continue; 1064 1065 if (ctx->ops.prepare_access_checks) 1066 ctx->ops.prepare_access_checks(ctx); 1067 if (ctx->callback.after_sampling && 1068 ctx->callback.after_sampling(ctx)) 1069 done = true; 1070 1071 kdamond_usleep(ctx->sample_interval); 1072 1073 if (ctx->ops.check_accesses) 1074 max_nr_accesses = ctx->ops.check_accesses(ctx); 1075 1076 if (kdamond_aggregate_interval_passed(ctx)) { 1077 kdamond_merge_regions(ctx, 1078 max_nr_accesses / 10, 1079 sz_limit); 1080 if (ctx->callback.after_aggregation && 1081 ctx->callback.after_aggregation(ctx)) 1082 done = true; 1083 kdamond_apply_schemes(ctx); 1084 kdamond_reset_aggregated(ctx); 1085 kdamond_split_regions(ctx); 1086 if (ctx->ops.reset_aggregated) 1087 ctx->ops.reset_aggregated(ctx); 1088 } 1089 1090 if (kdamond_need_update_operations(ctx)) { 1091 if (ctx->ops.update) 1092 ctx->ops.update(ctx); 1093 sz_limit = damon_region_sz_limit(ctx); 1094 } 1095 } 1096 damon_for_each_target(t, ctx) { 1097 damon_for_each_region_safe(r, next, t) 1098 damon_destroy_region(r, t); 1099 } 1100 1101 if (ctx->callback.before_terminate) 1102 ctx->callback.before_terminate(ctx); 1103 if (ctx->ops.cleanup) 1104 ctx->ops.cleanup(ctx); 1105 1106 pr_debug("kdamond (%d) finishes\n", current->pid); 1107 mutex_lock(&ctx->kdamond_lock); 1108 ctx->kdamond = NULL; 1109 mutex_unlock(&ctx->kdamond_lock); 1110 1111 mutex_lock(&damon_lock); 1112 nr_running_ctxs--; 1113 if (!nr_running_ctxs && running_exclusive_ctxs) 1114 running_exclusive_ctxs = false; 1115 mutex_unlock(&damon_lock); 1116 1117 return 0; 1118 } 1119 1120 #include "core-test.h" 1121