1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Data Access Monitor 4 * 5 * Author: SeongJae Park <sjpark@amazon.de> 6 */ 7 8 #define pr_fmt(fmt) "damon: " fmt 9 10 #include <linux/damon.h> 11 #include <linux/delay.h> 12 #include <linux/kthread.h> 13 #include <linux/mm.h> 14 #include <linux/slab.h> 15 #include <linux/string.h> 16 17 #define CREATE_TRACE_POINTS 18 #include <trace/events/damon.h> 19 20 #ifdef CONFIG_DAMON_KUNIT_TEST 21 #undef DAMON_MIN_REGION 22 #define DAMON_MIN_REGION 1 23 #endif 24 25 static DEFINE_MUTEX(damon_lock); 26 static int nr_running_ctxs; 27 static bool running_exclusive_ctxs; 28 29 static DEFINE_MUTEX(damon_ops_lock); 30 static struct damon_operations damon_registered_ops[NR_DAMON_OPS]; 31 32 /* Should be called under damon_ops_lock with id smaller than NR_DAMON_OPS */ 33 static bool __damon_is_registered_ops(enum damon_ops_id id) 34 { 35 struct damon_operations empty_ops = {}; 36 37 if (!memcmp(&empty_ops, &damon_registered_ops[id], sizeof(empty_ops))) 38 return false; 39 return true; 40 } 41 42 /** 43 * damon_is_registered_ops() - Check if a given damon_operations is registered. 44 * @id: Id of the damon_operations to check if registered. 45 * 46 * Return: true if the ops is set, false otherwise. 47 */ 48 bool damon_is_registered_ops(enum damon_ops_id id) 49 { 50 bool registered; 51 52 if (id >= NR_DAMON_OPS) 53 return false; 54 mutex_lock(&damon_ops_lock); 55 registered = __damon_is_registered_ops(id); 56 mutex_unlock(&damon_ops_lock); 57 return registered; 58 } 59 60 /** 61 * damon_register_ops() - Register a monitoring operations set to DAMON. 62 * @ops: monitoring operations set to register. 63 * 64 * This function registers a monitoring operations set of valid &struct 65 * damon_operations->id so that others can find and use them later. 66 * 67 * Return: 0 on success, negative error code otherwise. 68 */ 69 int damon_register_ops(struct damon_operations *ops) 70 { 71 int err = 0; 72 73 if (ops->id >= NR_DAMON_OPS) 74 return -EINVAL; 75 mutex_lock(&damon_ops_lock); 76 /* Fail for already registered ops */ 77 if (__damon_is_registered_ops(ops->id)) { 78 err = -EINVAL; 79 goto out; 80 } 81 damon_registered_ops[ops->id] = *ops; 82 out: 83 mutex_unlock(&damon_ops_lock); 84 return err; 85 } 86 87 /** 88 * damon_select_ops() - Select a monitoring operations to use with the context. 89 * @ctx: monitoring context to use the operations. 90 * @id: id of the registered monitoring operations to select. 91 * 92 * This function finds registered monitoring operations set of @id and make 93 * @ctx to use it. 94 * 95 * Return: 0 on success, negative error code otherwise. 96 */ 97 int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id) 98 { 99 int err = 0; 100 101 if (id >= NR_DAMON_OPS) 102 return -EINVAL; 103 104 mutex_lock(&damon_ops_lock); 105 if (!__damon_is_registered_ops(id)) 106 err = -EINVAL; 107 else 108 ctx->ops = damon_registered_ops[id]; 109 mutex_unlock(&damon_ops_lock); 110 return err; 111 } 112 113 /* 114 * Construct a damon_region struct 115 * 116 * Returns the pointer to the new struct if success, or NULL otherwise 117 */ 118 struct damon_region *damon_new_region(unsigned long start, unsigned long end) 119 { 120 struct damon_region *region; 121 122 region = kmalloc(sizeof(*region), GFP_KERNEL); 123 if (!region) 124 return NULL; 125 126 region->ar.start = start; 127 region->ar.end = end; 128 region->nr_accesses = 0; 129 INIT_LIST_HEAD(®ion->list); 130 131 region->age = 0; 132 region->last_nr_accesses = 0; 133 134 return region; 135 } 136 137 void damon_add_region(struct damon_region *r, struct damon_target *t) 138 { 139 list_add_tail(&r->list, &t->regions_list); 140 t->nr_regions++; 141 } 142 143 static void damon_del_region(struct damon_region *r, struct damon_target *t) 144 { 145 list_del(&r->list); 146 t->nr_regions--; 147 } 148 149 static void damon_free_region(struct damon_region *r) 150 { 151 kfree(r); 152 } 153 154 void damon_destroy_region(struct damon_region *r, struct damon_target *t) 155 { 156 damon_del_region(r, t); 157 damon_free_region(r); 158 } 159 160 struct damos *damon_new_scheme( 161 unsigned long min_sz_region, unsigned long max_sz_region, 162 unsigned int min_nr_accesses, unsigned int max_nr_accesses, 163 unsigned int min_age_region, unsigned int max_age_region, 164 enum damos_action action, struct damos_quota *quota, 165 struct damos_watermarks *wmarks) 166 { 167 struct damos *scheme; 168 169 scheme = kmalloc(sizeof(*scheme), GFP_KERNEL); 170 if (!scheme) 171 return NULL; 172 scheme->min_sz_region = min_sz_region; 173 scheme->max_sz_region = max_sz_region; 174 scheme->min_nr_accesses = min_nr_accesses; 175 scheme->max_nr_accesses = max_nr_accesses; 176 scheme->min_age_region = min_age_region; 177 scheme->max_age_region = max_age_region; 178 scheme->action = action; 179 scheme->stat = (struct damos_stat){}; 180 INIT_LIST_HEAD(&scheme->list); 181 182 scheme->quota.ms = quota->ms; 183 scheme->quota.sz = quota->sz; 184 scheme->quota.reset_interval = quota->reset_interval; 185 scheme->quota.weight_sz = quota->weight_sz; 186 scheme->quota.weight_nr_accesses = quota->weight_nr_accesses; 187 scheme->quota.weight_age = quota->weight_age; 188 scheme->quota.total_charged_sz = 0; 189 scheme->quota.total_charged_ns = 0; 190 scheme->quota.esz = 0; 191 scheme->quota.charged_sz = 0; 192 scheme->quota.charged_from = 0; 193 scheme->quota.charge_target_from = NULL; 194 scheme->quota.charge_addr_from = 0; 195 196 scheme->wmarks.metric = wmarks->metric; 197 scheme->wmarks.interval = wmarks->interval; 198 scheme->wmarks.high = wmarks->high; 199 scheme->wmarks.mid = wmarks->mid; 200 scheme->wmarks.low = wmarks->low; 201 scheme->wmarks.activated = true; 202 203 return scheme; 204 } 205 206 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) 207 { 208 list_add_tail(&s->list, &ctx->schemes); 209 } 210 211 static void damon_del_scheme(struct damos *s) 212 { 213 list_del(&s->list); 214 } 215 216 static void damon_free_scheme(struct damos *s) 217 { 218 kfree(s); 219 } 220 221 void damon_destroy_scheme(struct damos *s) 222 { 223 damon_del_scheme(s); 224 damon_free_scheme(s); 225 } 226 227 /* 228 * Construct a damon_target struct 229 * 230 * Returns the pointer to the new struct if success, or NULL otherwise 231 */ 232 struct damon_target *damon_new_target(void) 233 { 234 struct damon_target *t; 235 236 t = kmalloc(sizeof(*t), GFP_KERNEL); 237 if (!t) 238 return NULL; 239 240 t->pid = NULL; 241 t->nr_regions = 0; 242 INIT_LIST_HEAD(&t->regions_list); 243 244 return t; 245 } 246 247 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t) 248 { 249 list_add_tail(&t->list, &ctx->adaptive_targets); 250 } 251 252 bool damon_targets_empty(struct damon_ctx *ctx) 253 { 254 return list_empty(&ctx->adaptive_targets); 255 } 256 257 static void damon_del_target(struct damon_target *t) 258 { 259 list_del(&t->list); 260 } 261 262 void damon_free_target(struct damon_target *t) 263 { 264 struct damon_region *r, *next; 265 266 damon_for_each_region_safe(r, next, t) 267 damon_free_region(r); 268 kfree(t); 269 } 270 271 void damon_destroy_target(struct damon_target *t) 272 { 273 damon_del_target(t); 274 damon_free_target(t); 275 } 276 277 unsigned int damon_nr_regions(struct damon_target *t) 278 { 279 return t->nr_regions; 280 } 281 282 struct damon_ctx *damon_new_ctx(void) 283 { 284 struct damon_ctx *ctx; 285 286 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 287 if (!ctx) 288 return NULL; 289 290 ctx->sample_interval = 5 * 1000; 291 ctx->aggr_interval = 100 * 1000; 292 ctx->ops_update_interval = 60 * 1000 * 1000; 293 294 ktime_get_coarse_ts64(&ctx->last_aggregation); 295 ctx->last_ops_update = ctx->last_aggregation; 296 297 mutex_init(&ctx->kdamond_lock); 298 299 ctx->min_nr_regions = 10; 300 ctx->max_nr_regions = 1000; 301 302 INIT_LIST_HEAD(&ctx->adaptive_targets); 303 INIT_LIST_HEAD(&ctx->schemes); 304 305 return ctx; 306 } 307 308 static void damon_destroy_targets(struct damon_ctx *ctx) 309 { 310 struct damon_target *t, *next_t; 311 312 if (ctx->ops.cleanup) { 313 ctx->ops.cleanup(ctx); 314 return; 315 } 316 317 damon_for_each_target_safe(t, next_t, ctx) 318 damon_destroy_target(t); 319 } 320 321 void damon_destroy_ctx(struct damon_ctx *ctx) 322 { 323 struct damos *s, *next_s; 324 325 damon_destroy_targets(ctx); 326 327 damon_for_each_scheme_safe(s, next_s, ctx) 328 damon_destroy_scheme(s); 329 330 kfree(ctx); 331 } 332 333 /** 334 * damon_set_attrs() - Set attributes for the monitoring. 335 * @ctx: monitoring context 336 * @sample_int: time interval between samplings 337 * @aggr_int: time interval between aggregations 338 * @ops_upd_int: time interval between monitoring operations updates 339 * @min_nr_reg: minimal number of regions 340 * @max_nr_reg: maximum number of regions 341 * 342 * This function should not be called while the kdamond is running. 343 * Every time interval is in micro-seconds. 344 * 345 * Return: 0 on success, negative error code otherwise. 346 */ 347 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, 348 unsigned long aggr_int, unsigned long ops_upd_int, 349 unsigned long min_nr_reg, unsigned long max_nr_reg) 350 { 351 if (min_nr_reg < 3) 352 return -EINVAL; 353 if (min_nr_reg > max_nr_reg) 354 return -EINVAL; 355 356 ctx->sample_interval = sample_int; 357 ctx->aggr_interval = aggr_int; 358 ctx->ops_update_interval = ops_upd_int; 359 ctx->min_nr_regions = min_nr_reg; 360 ctx->max_nr_regions = max_nr_reg; 361 362 return 0; 363 } 364 365 /** 366 * damon_set_schemes() - Set data access monitoring based operation schemes. 367 * @ctx: monitoring context 368 * @schemes: array of the schemes 369 * @nr_schemes: number of entries in @schemes 370 * 371 * This function should not be called while the kdamond of the context is 372 * running. 373 * 374 * Return: 0 if success, or negative error code otherwise. 375 */ 376 int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, 377 ssize_t nr_schemes) 378 { 379 struct damos *s, *next; 380 ssize_t i; 381 382 damon_for_each_scheme_safe(s, next, ctx) 383 damon_destroy_scheme(s); 384 for (i = 0; i < nr_schemes; i++) 385 damon_add_scheme(ctx, schemes[i]); 386 return 0; 387 } 388 389 /** 390 * damon_nr_running_ctxs() - Return number of currently running contexts. 391 */ 392 int damon_nr_running_ctxs(void) 393 { 394 int nr_ctxs; 395 396 mutex_lock(&damon_lock); 397 nr_ctxs = nr_running_ctxs; 398 mutex_unlock(&damon_lock); 399 400 return nr_ctxs; 401 } 402 403 /* Returns the size upper limit for each monitoring region */ 404 static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) 405 { 406 struct damon_target *t; 407 struct damon_region *r; 408 unsigned long sz = 0; 409 410 damon_for_each_target(t, ctx) { 411 damon_for_each_region(r, t) 412 sz += r->ar.end - r->ar.start; 413 } 414 415 if (ctx->min_nr_regions) 416 sz /= ctx->min_nr_regions; 417 if (sz < DAMON_MIN_REGION) 418 sz = DAMON_MIN_REGION; 419 420 return sz; 421 } 422 423 static int kdamond_fn(void *data); 424 425 /* 426 * __damon_start() - Starts monitoring with given context. 427 * @ctx: monitoring context 428 * 429 * This function should be called while damon_lock is hold. 430 * 431 * Return: 0 on success, negative error code otherwise. 432 */ 433 static int __damon_start(struct damon_ctx *ctx) 434 { 435 int err = -EBUSY; 436 437 mutex_lock(&ctx->kdamond_lock); 438 if (!ctx->kdamond) { 439 err = 0; 440 ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", 441 nr_running_ctxs); 442 if (IS_ERR(ctx->kdamond)) { 443 err = PTR_ERR(ctx->kdamond); 444 ctx->kdamond = NULL; 445 } 446 } 447 mutex_unlock(&ctx->kdamond_lock); 448 449 return err; 450 } 451 452 /** 453 * damon_start() - Starts the monitorings for a given group of contexts. 454 * @ctxs: an array of the pointers for contexts to start monitoring 455 * @nr_ctxs: size of @ctxs 456 * @exclusive: exclusiveness of this contexts group 457 * 458 * This function starts a group of monitoring threads for a group of monitoring 459 * contexts. One thread per each context is created and run in parallel. The 460 * caller should handle synchronization between the threads by itself. If 461 * @exclusive is true and a group of threads that created by other 462 * 'damon_start()' call is currently running, this function does nothing but 463 * returns -EBUSY. 464 * 465 * Return: 0 on success, negative error code otherwise. 466 */ 467 int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive) 468 { 469 int i; 470 int err = 0; 471 472 mutex_lock(&damon_lock); 473 if ((exclusive && nr_running_ctxs) || 474 (!exclusive && running_exclusive_ctxs)) { 475 mutex_unlock(&damon_lock); 476 return -EBUSY; 477 } 478 479 for (i = 0; i < nr_ctxs; i++) { 480 err = __damon_start(ctxs[i]); 481 if (err) 482 break; 483 nr_running_ctxs++; 484 } 485 if (exclusive && nr_running_ctxs) 486 running_exclusive_ctxs = true; 487 mutex_unlock(&damon_lock); 488 489 return err; 490 } 491 492 /* 493 * __damon_stop() - Stops monitoring of a given context. 494 * @ctx: monitoring context 495 * 496 * Return: 0 on success, negative error code otherwise. 497 */ 498 static int __damon_stop(struct damon_ctx *ctx) 499 { 500 struct task_struct *tsk; 501 502 mutex_lock(&ctx->kdamond_lock); 503 tsk = ctx->kdamond; 504 if (tsk) { 505 get_task_struct(tsk); 506 mutex_unlock(&ctx->kdamond_lock); 507 kthread_stop(tsk); 508 put_task_struct(tsk); 509 return 0; 510 } 511 mutex_unlock(&ctx->kdamond_lock); 512 513 return -EPERM; 514 } 515 516 /** 517 * damon_stop() - Stops the monitorings for a given group of contexts. 518 * @ctxs: an array of the pointers for contexts to stop monitoring 519 * @nr_ctxs: size of @ctxs 520 * 521 * Return: 0 on success, negative error code otherwise. 522 */ 523 int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) 524 { 525 int i, err = 0; 526 527 for (i = 0; i < nr_ctxs; i++) { 528 /* nr_running_ctxs is decremented in kdamond_fn */ 529 err = __damon_stop(ctxs[i]); 530 if (err) 531 break; 532 } 533 return err; 534 } 535 536 /* 537 * damon_check_reset_time_interval() - Check if a time interval is elapsed. 538 * @baseline: the time to check whether the interval has elapsed since 539 * @interval: the time interval (microseconds) 540 * 541 * See whether the given time interval has passed since the given baseline 542 * time. If so, it also updates the baseline to current time for next check. 543 * 544 * Return: true if the time interval has passed, or false otherwise. 545 */ 546 static bool damon_check_reset_time_interval(struct timespec64 *baseline, 547 unsigned long interval) 548 { 549 struct timespec64 now; 550 551 ktime_get_coarse_ts64(&now); 552 if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) < 553 interval * 1000) 554 return false; 555 *baseline = now; 556 return true; 557 } 558 559 /* 560 * Check whether it is time to flush the aggregated information 561 */ 562 static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx) 563 { 564 return damon_check_reset_time_interval(&ctx->last_aggregation, 565 ctx->aggr_interval); 566 } 567 568 /* 569 * Reset the aggregated monitoring results ('nr_accesses' of each region). 570 */ 571 static void kdamond_reset_aggregated(struct damon_ctx *c) 572 { 573 struct damon_target *t; 574 unsigned int ti = 0; /* target's index */ 575 576 damon_for_each_target(t, c) { 577 struct damon_region *r; 578 579 damon_for_each_region(r, t) { 580 trace_damon_aggregated(t, ti, r, damon_nr_regions(t)); 581 r->last_nr_accesses = r->nr_accesses; 582 r->nr_accesses = 0; 583 } 584 ti++; 585 } 586 } 587 588 static void damon_split_region_at(struct damon_ctx *ctx, 589 struct damon_target *t, struct damon_region *r, 590 unsigned long sz_r); 591 592 static bool __damos_valid_target(struct damon_region *r, struct damos *s) 593 { 594 unsigned long sz; 595 596 sz = r->ar.end - r->ar.start; 597 return s->min_sz_region <= sz && sz <= s->max_sz_region && 598 s->min_nr_accesses <= r->nr_accesses && 599 r->nr_accesses <= s->max_nr_accesses && 600 s->min_age_region <= r->age && r->age <= s->max_age_region; 601 } 602 603 static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, 604 struct damon_region *r, struct damos *s) 605 { 606 bool ret = __damos_valid_target(r, s); 607 608 if (!ret || !s->quota.esz || !c->ops.get_scheme_score) 609 return ret; 610 611 return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score; 612 } 613 614 static void damon_do_apply_schemes(struct damon_ctx *c, 615 struct damon_target *t, 616 struct damon_region *r) 617 { 618 struct damos *s; 619 620 damon_for_each_scheme(s, c) { 621 struct damos_quota *quota = &s->quota; 622 unsigned long sz = r->ar.end - r->ar.start; 623 struct timespec64 begin, end; 624 unsigned long sz_applied = 0; 625 626 if (!s->wmarks.activated) 627 continue; 628 629 /* Check the quota */ 630 if (quota->esz && quota->charged_sz >= quota->esz) 631 continue; 632 633 /* Skip previously charged regions */ 634 if (quota->charge_target_from) { 635 if (t != quota->charge_target_from) 636 continue; 637 if (r == damon_last_region(t)) { 638 quota->charge_target_from = NULL; 639 quota->charge_addr_from = 0; 640 continue; 641 } 642 if (quota->charge_addr_from && 643 r->ar.end <= quota->charge_addr_from) 644 continue; 645 646 if (quota->charge_addr_from && r->ar.start < 647 quota->charge_addr_from) { 648 sz = ALIGN_DOWN(quota->charge_addr_from - 649 r->ar.start, DAMON_MIN_REGION); 650 if (!sz) { 651 if (r->ar.end - r->ar.start <= 652 DAMON_MIN_REGION) 653 continue; 654 sz = DAMON_MIN_REGION; 655 } 656 damon_split_region_at(c, t, r, sz); 657 r = damon_next_region(r); 658 sz = r->ar.end - r->ar.start; 659 } 660 quota->charge_target_from = NULL; 661 quota->charge_addr_from = 0; 662 } 663 664 if (!damos_valid_target(c, t, r, s)) 665 continue; 666 667 /* Apply the scheme */ 668 if (c->ops.apply_scheme) { 669 if (quota->esz && 670 quota->charged_sz + sz > quota->esz) { 671 sz = ALIGN_DOWN(quota->esz - quota->charged_sz, 672 DAMON_MIN_REGION); 673 if (!sz) 674 goto update_stat; 675 damon_split_region_at(c, t, r, sz); 676 } 677 ktime_get_coarse_ts64(&begin); 678 sz_applied = c->ops.apply_scheme(c, t, r, s); 679 ktime_get_coarse_ts64(&end); 680 quota->total_charged_ns += timespec64_to_ns(&end) - 681 timespec64_to_ns(&begin); 682 quota->charged_sz += sz; 683 if (quota->esz && quota->charged_sz >= quota->esz) { 684 quota->charge_target_from = t; 685 quota->charge_addr_from = r->ar.end + 1; 686 } 687 } 688 if (s->action != DAMOS_STAT) 689 r->age = 0; 690 691 update_stat: 692 s->stat.nr_tried++; 693 s->stat.sz_tried += sz; 694 if (sz_applied) 695 s->stat.nr_applied++; 696 s->stat.sz_applied += sz_applied; 697 } 698 } 699 700 /* Shouldn't be called if quota->ms and quota->sz are zero */ 701 static void damos_set_effective_quota(struct damos_quota *quota) 702 { 703 unsigned long throughput; 704 unsigned long esz; 705 706 if (!quota->ms) { 707 quota->esz = quota->sz; 708 return; 709 } 710 711 if (quota->total_charged_ns) 712 throughput = quota->total_charged_sz * 1000000 / 713 quota->total_charged_ns; 714 else 715 throughput = PAGE_SIZE * 1024; 716 esz = throughput * quota->ms; 717 718 if (quota->sz && quota->sz < esz) 719 esz = quota->sz; 720 quota->esz = esz; 721 } 722 723 static void kdamond_apply_schemes(struct damon_ctx *c) 724 { 725 struct damon_target *t; 726 struct damon_region *r, *next_r; 727 struct damos *s; 728 729 damon_for_each_scheme(s, c) { 730 struct damos_quota *quota = &s->quota; 731 unsigned long cumulated_sz; 732 unsigned int score, max_score = 0; 733 734 if (!s->wmarks.activated) 735 continue; 736 737 if (!quota->ms && !quota->sz) 738 continue; 739 740 /* New charge window starts */ 741 if (time_after_eq(jiffies, quota->charged_from + 742 msecs_to_jiffies( 743 quota->reset_interval))) { 744 if (quota->esz && quota->charged_sz >= quota->esz) 745 s->stat.qt_exceeds++; 746 quota->total_charged_sz += quota->charged_sz; 747 quota->charged_from = jiffies; 748 quota->charged_sz = 0; 749 damos_set_effective_quota(quota); 750 } 751 752 if (!c->ops.get_scheme_score) 753 continue; 754 755 /* Fill up the score histogram */ 756 memset(quota->histogram, 0, sizeof(quota->histogram)); 757 damon_for_each_target(t, c) { 758 damon_for_each_region(r, t) { 759 if (!__damos_valid_target(r, s)) 760 continue; 761 score = c->ops.get_scheme_score( 762 c, t, r, s); 763 quota->histogram[score] += 764 r->ar.end - r->ar.start; 765 if (score > max_score) 766 max_score = score; 767 } 768 } 769 770 /* Set the min score limit */ 771 for (cumulated_sz = 0, score = max_score; ; score--) { 772 cumulated_sz += quota->histogram[score]; 773 if (cumulated_sz >= quota->esz || !score) 774 break; 775 } 776 quota->min_score = score; 777 } 778 779 damon_for_each_target(t, c) { 780 damon_for_each_region_safe(r, next_r, t) 781 damon_do_apply_schemes(c, t, r); 782 } 783 } 784 785 static inline unsigned long sz_damon_region(struct damon_region *r) 786 { 787 return r->ar.end - r->ar.start; 788 } 789 790 /* 791 * Merge two adjacent regions into one region 792 */ 793 static void damon_merge_two_regions(struct damon_target *t, 794 struct damon_region *l, struct damon_region *r) 795 { 796 unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r); 797 798 l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / 799 (sz_l + sz_r); 800 l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); 801 l->ar.end = r->ar.end; 802 damon_destroy_region(r, t); 803 } 804 805 /* 806 * Merge adjacent regions having similar access frequencies 807 * 808 * t target affected by this merge operation 809 * thres '->nr_accesses' diff threshold for the merge 810 * sz_limit size upper limit of each region 811 */ 812 static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, 813 unsigned long sz_limit) 814 { 815 struct damon_region *r, *prev = NULL, *next; 816 817 damon_for_each_region_safe(r, next, t) { 818 if (abs(r->nr_accesses - r->last_nr_accesses) > thres) 819 r->age = 0; 820 else 821 r->age++; 822 823 if (prev && prev->ar.end == r->ar.start && 824 abs(prev->nr_accesses - r->nr_accesses) <= thres && 825 sz_damon_region(prev) + sz_damon_region(r) <= sz_limit) 826 damon_merge_two_regions(t, prev, r); 827 else 828 prev = r; 829 } 830 } 831 832 /* 833 * Merge adjacent regions having similar access frequencies 834 * 835 * threshold '->nr_accesses' diff threshold for the merge 836 * sz_limit size upper limit of each region 837 * 838 * This function merges monitoring target regions which are adjacent and their 839 * access frequencies are similar. This is for minimizing the monitoring 840 * overhead under the dynamically changeable access pattern. If a merge was 841 * unnecessarily made, later 'kdamond_split_regions()' will revert it. 842 */ 843 static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, 844 unsigned long sz_limit) 845 { 846 struct damon_target *t; 847 848 damon_for_each_target(t, c) 849 damon_merge_regions_of(t, threshold, sz_limit); 850 } 851 852 /* 853 * Split a region in two 854 * 855 * r the region to be split 856 * sz_r size of the first sub-region that will be made 857 */ 858 static void damon_split_region_at(struct damon_ctx *ctx, 859 struct damon_target *t, struct damon_region *r, 860 unsigned long sz_r) 861 { 862 struct damon_region *new; 863 864 new = damon_new_region(r->ar.start + sz_r, r->ar.end); 865 if (!new) 866 return; 867 868 r->ar.end = new->ar.start; 869 870 new->age = r->age; 871 new->last_nr_accesses = r->last_nr_accesses; 872 873 damon_insert_region(new, r, damon_next_region(r), t); 874 } 875 876 /* Split every region in the given target into 'nr_subs' regions */ 877 static void damon_split_regions_of(struct damon_ctx *ctx, 878 struct damon_target *t, int nr_subs) 879 { 880 struct damon_region *r, *next; 881 unsigned long sz_region, sz_sub = 0; 882 int i; 883 884 damon_for_each_region_safe(r, next, t) { 885 sz_region = r->ar.end - r->ar.start; 886 887 for (i = 0; i < nr_subs - 1 && 888 sz_region > 2 * DAMON_MIN_REGION; i++) { 889 /* 890 * Randomly select size of left sub-region to be at 891 * least 10 percent and at most 90% of original region 892 */ 893 sz_sub = ALIGN_DOWN(damon_rand(1, 10) * 894 sz_region / 10, DAMON_MIN_REGION); 895 /* Do not allow blank region */ 896 if (sz_sub == 0 || sz_sub >= sz_region) 897 continue; 898 899 damon_split_region_at(ctx, t, r, sz_sub); 900 sz_region = sz_sub; 901 } 902 } 903 } 904 905 /* 906 * Split every target region into randomly-sized small regions 907 * 908 * This function splits every target region into random-sized small regions if 909 * current total number of the regions is equal or smaller than half of the 910 * user-specified maximum number of regions. This is for maximizing the 911 * monitoring accuracy under the dynamically changeable access patterns. If a 912 * split was unnecessarily made, later 'kdamond_merge_regions()' will revert 913 * it. 914 */ 915 static void kdamond_split_regions(struct damon_ctx *ctx) 916 { 917 struct damon_target *t; 918 unsigned int nr_regions = 0; 919 static unsigned int last_nr_regions; 920 int nr_subregions = 2; 921 922 damon_for_each_target(t, ctx) 923 nr_regions += damon_nr_regions(t); 924 925 if (nr_regions > ctx->max_nr_regions / 2) 926 return; 927 928 /* Maybe the middle of the region has different access frequency */ 929 if (last_nr_regions == nr_regions && 930 nr_regions < ctx->max_nr_regions / 3) 931 nr_subregions = 3; 932 933 damon_for_each_target(t, ctx) 934 damon_split_regions_of(ctx, t, nr_subregions); 935 936 last_nr_regions = nr_regions; 937 } 938 939 /* 940 * Check whether it is time to check and apply the operations-related data 941 * structures. 942 * 943 * Returns true if it is. 944 */ 945 static bool kdamond_need_update_operations(struct damon_ctx *ctx) 946 { 947 return damon_check_reset_time_interval(&ctx->last_ops_update, 948 ctx->ops_update_interval); 949 } 950 951 /* 952 * Check whether current monitoring should be stopped 953 * 954 * The monitoring is stopped when either the user requested to stop, or all 955 * monitoring targets are invalid. 956 * 957 * Returns true if need to stop current monitoring. 958 */ 959 static bool kdamond_need_stop(struct damon_ctx *ctx) 960 { 961 struct damon_target *t; 962 963 if (kthread_should_stop()) 964 return true; 965 966 if (!ctx->ops.target_valid) 967 return false; 968 969 damon_for_each_target(t, ctx) { 970 if (ctx->ops.target_valid(t)) 971 return false; 972 } 973 974 return true; 975 } 976 977 static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric) 978 { 979 struct sysinfo i; 980 981 switch (metric) { 982 case DAMOS_WMARK_FREE_MEM_RATE: 983 si_meminfo(&i); 984 return i.freeram * 1000 / i.totalram; 985 default: 986 break; 987 } 988 return -EINVAL; 989 } 990 991 /* 992 * Returns zero if the scheme is active. Else, returns time to wait for next 993 * watermark check in micro-seconds. 994 */ 995 static unsigned long damos_wmark_wait_us(struct damos *scheme) 996 { 997 unsigned long metric; 998 999 if (scheme->wmarks.metric == DAMOS_WMARK_NONE) 1000 return 0; 1001 1002 metric = damos_wmark_metric_value(scheme->wmarks.metric); 1003 /* higher than high watermark or lower than low watermark */ 1004 if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) { 1005 if (scheme->wmarks.activated) 1006 pr_debug("deactivate a scheme (%d) for %s wmark\n", 1007 scheme->action, 1008 metric > scheme->wmarks.high ? 1009 "high" : "low"); 1010 scheme->wmarks.activated = false; 1011 return scheme->wmarks.interval; 1012 } 1013 1014 /* inactive and higher than middle watermark */ 1015 if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) && 1016 !scheme->wmarks.activated) 1017 return scheme->wmarks.interval; 1018 1019 if (!scheme->wmarks.activated) 1020 pr_debug("activate a scheme (%d)\n", scheme->action); 1021 scheme->wmarks.activated = true; 1022 return 0; 1023 } 1024 1025 static void kdamond_usleep(unsigned long usecs) 1026 { 1027 /* See Documentation/timers/timers-howto.rst for the thresholds */ 1028 if (usecs > 20 * USEC_PER_MSEC) 1029 schedule_timeout_idle(usecs_to_jiffies(usecs)); 1030 else 1031 usleep_idle_range(usecs, usecs + 1); 1032 } 1033 1034 /* Returns negative error code if it's not activated but should return */ 1035 static int kdamond_wait_activation(struct damon_ctx *ctx) 1036 { 1037 struct damos *s; 1038 unsigned long wait_time; 1039 unsigned long min_wait_time = 0; 1040 bool init_wait_time = false; 1041 1042 while (!kdamond_need_stop(ctx)) { 1043 damon_for_each_scheme(s, ctx) { 1044 wait_time = damos_wmark_wait_us(s); 1045 if (!init_wait_time || wait_time < min_wait_time) { 1046 init_wait_time = true; 1047 min_wait_time = wait_time; 1048 } 1049 } 1050 if (!min_wait_time) 1051 return 0; 1052 1053 kdamond_usleep(min_wait_time); 1054 } 1055 return -EBUSY; 1056 } 1057 1058 /* 1059 * The monitoring daemon that runs as a kernel thread 1060 */ 1061 static int kdamond_fn(void *data) 1062 { 1063 struct damon_ctx *ctx = data; 1064 struct damon_target *t; 1065 struct damon_region *r, *next; 1066 unsigned int max_nr_accesses = 0; 1067 unsigned long sz_limit = 0; 1068 bool done = false; 1069 1070 pr_debug("kdamond (%d) starts\n", current->pid); 1071 1072 if (ctx->ops.init) 1073 ctx->ops.init(ctx); 1074 if (ctx->callback.before_start && ctx->callback.before_start(ctx)) 1075 done = true; 1076 1077 sz_limit = damon_region_sz_limit(ctx); 1078 1079 while (!kdamond_need_stop(ctx) && !done) { 1080 if (kdamond_wait_activation(ctx)) 1081 continue; 1082 1083 if (ctx->ops.prepare_access_checks) 1084 ctx->ops.prepare_access_checks(ctx); 1085 if (ctx->callback.after_sampling && 1086 ctx->callback.after_sampling(ctx)) 1087 done = true; 1088 1089 kdamond_usleep(ctx->sample_interval); 1090 1091 if (ctx->ops.check_accesses) 1092 max_nr_accesses = ctx->ops.check_accesses(ctx); 1093 1094 if (kdamond_aggregate_interval_passed(ctx)) { 1095 kdamond_merge_regions(ctx, 1096 max_nr_accesses / 10, 1097 sz_limit); 1098 if (ctx->callback.after_aggregation && 1099 ctx->callback.after_aggregation(ctx)) 1100 done = true; 1101 kdamond_apply_schemes(ctx); 1102 kdamond_reset_aggregated(ctx); 1103 kdamond_split_regions(ctx); 1104 if (ctx->ops.reset_aggregated) 1105 ctx->ops.reset_aggregated(ctx); 1106 } 1107 1108 if (kdamond_need_update_operations(ctx)) { 1109 if (ctx->ops.update) 1110 ctx->ops.update(ctx); 1111 sz_limit = damon_region_sz_limit(ctx); 1112 } 1113 } 1114 damon_for_each_target(t, ctx) { 1115 damon_for_each_region_safe(r, next, t) 1116 damon_destroy_region(r, t); 1117 } 1118 1119 if (ctx->callback.before_terminate) 1120 ctx->callback.before_terminate(ctx); 1121 if (ctx->ops.cleanup) 1122 ctx->ops.cleanup(ctx); 1123 1124 pr_debug("kdamond (%d) finishes\n", current->pid); 1125 mutex_lock(&ctx->kdamond_lock); 1126 ctx->kdamond = NULL; 1127 mutex_unlock(&ctx->kdamond_lock); 1128 1129 mutex_lock(&damon_lock); 1130 nr_running_ctxs--; 1131 if (!nr_running_ctxs && running_exclusive_ctxs) 1132 running_exclusive_ctxs = false; 1133 mutex_unlock(&damon_lock); 1134 1135 return 0; 1136 } 1137 1138 #include "core-test.h" 1139