1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Data Access Monitor 4 * 5 * Author: SeongJae Park <sjpark@amazon.de> 6 */ 7 8 #define pr_fmt(fmt) "damon: " fmt 9 10 #include <linux/damon.h> 11 #include <linux/delay.h> 12 #include <linux/kthread.h> 13 #include <linux/mm.h> 14 #include <linux/random.h> 15 #include <linux/slab.h> 16 #include <linux/string.h> 17 18 #define CREATE_TRACE_POINTS 19 #include <trace/events/damon.h> 20 21 #ifdef CONFIG_DAMON_KUNIT_TEST 22 #undef DAMON_MIN_REGION 23 #define DAMON_MIN_REGION 1 24 #endif 25 26 /* Get a random number in [l, r) */ 27 #define damon_rand(l, r) (l + prandom_u32_max(r - l)) 28 29 static DEFINE_MUTEX(damon_lock); 30 static int nr_running_ctxs; 31 32 /* 33 * Construct a damon_region struct 34 * 35 * Returns the pointer to the new struct if success, or NULL otherwise 36 */ 37 struct damon_region *damon_new_region(unsigned long start, unsigned long end) 38 { 39 struct damon_region *region; 40 41 region = kmalloc(sizeof(*region), GFP_KERNEL); 42 if (!region) 43 return NULL; 44 45 region->ar.start = start; 46 region->ar.end = end; 47 region->nr_accesses = 0; 48 INIT_LIST_HEAD(®ion->list); 49 50 region->age = 0; 51 region->last_nr_accesses = 0; 52 53 return region; 54 } 55 56 /* 57 * Add a region between two other regions 58 */ 59 inline void damon_insert_region(struct damon_region *r, 60 struct damon_region *prev, struct damon_region *next, 61 struct damon_target *t) 62 { 63 __list_add(&r->list, &prev->list, &next->list); 64 t->nr_regions++; 65 } 66 67 void damon_add_region(struct damon_region *r, struct damon_target *t) 68 { 69 list_add_tail(&r->list, &t->regions_list); 70 t->nr_regions++; 71 } 72 73 static void damon_del_region(struct damon_region *r, struct damon_target *t) 74 { 75 list_del(&r->list); 76 t->nr_regions--; 77 } 78 79 static void damon_free_region(struct damon_region *r) 80 { 81 kfree(r); 82 } 83 84 void damon_destroy_region(struct damon_region *r, struct damon_target *t) 85 { 86 damon_del_region(r, t); 87 damon_free_region(r); 88 } 89 90 struct damos *damon_new_scheme( 91 unsigned long min_sz_region, unsigned long max_sz_region, 92 unsigned int min_nr_accesses, unsigned int max_nr_accesses, 93 unsigned int min_age_region, unsigned int max_age_region, 94 enum damos_action action, struct damos_quota *quota, 95 struct damos_watermarks *wmarks) 96 { 97 struct damos *scheme; 98 99 scheme = kmalloc(sizeof(*scheme), GFP_KERNEL); 100 if (!scheme) 101 return NULL; 102 scheme->min_sz_region = min_sz_region; 103 scheme->max_sz_region = max_sz_region; 104 scheme->min_nr_accesses = min_nr_accesses; 105 scheme->max_nr_accesses = max_nr_accesses; 106 scheme->min_age_region = min_age_region; 107 scheme->max_age_region = max_age_region; 108 scheme->action = action; 109 scheme->stat_count = 0; 110 scheme->stat_sz = 0; 111 INIT_LIST_HEAD(&scheme->list); 112 113 scheme->quota.ms = quota->ms; 114 scheme->quota.sz = quota->sz; 115 scheme->quota.reset_interval = quota->reset_interval; 116 scheme->quota.weight_sz = quota->weight_sz; 117 scheme->quota.weight_nr_accesses = quota->weight_nr_accesses; 118 scheme->quota.weight_age = quota->weight_age; 119 scheme->quota.total_charged_sz = 0; 120 scheme->quota.total_charged_ns = 0; 121 scheme->quota.esz = 0; 122 scheme->quota.charged_sz = 0; 123 scheme->quota.charged_from = 0; 124 scheme->quota.charge_target_from = NULL; 125 scheme->quota.charge_addr_from = 0; 126 127 scheme->wmarks.metric = wmarks->metric; 128 scheme->wmarks.interval = wmarks->interval; 129 scheme->wmarks.high = wmarks->high; 130 scheme->wmarks.mid = wmarks->mid; 131 scheme->wmarks.low = wmarks->low; 132 scheme->wmarks.activated = true; 133 134 return scheme; 135 } 136 137 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) 138 { 139 list_add_tail(&s->list, &ctx->schemes); 140 } 141 142 static void damon_del_scheme(struct damos *s) 143 { 144 list_del(&s->list); 145 } 146 147 static void damon_free_scheme(struct damos *s) 148 { 149 kfree(s); 150 } 151 152 void damon_destroy_scheme(struct damos *s) 153 { 154 damon_del_scheme(s); 155 damon_free_scheme(s); 156 } 157 158 /* 159 * Construct a damon_target struct 160 * 161 * Returns the pointer to the new struct if success, or NULL otherwise 162 */ 163 struct damon_target *damon_new_target(unsigned long id) 164 { 165 struct damon_target *t; 166 167 t = kmalloc(sizeof(*t), GFP_KERNEL); 168 if (!t) 169 return NULL; 170 171 t->id = id; 172 t->nr_regions = 0; 173 INIT_LIST_HEAD(&t->regions_list); 174 175 return t; 176 } 177 178 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t) 179 { 180 list_add_tail(&t->list, &ctx->adaptive_targets); 181 } 182 183 bool damon_targets_empty(struct damon_ctx *ctx) 184 { 185 return list_empty(&ctx->adaptive_targets); 186 } 187 188 static void damon_del_target(struct damon_target *t) 189 { 190 list_del(&t->list); 191 } 192 193 void damon_free_target(struct damon_target *t) 194 { 195 struct damon_region *r, *next; 196 197 damon_for_each_region_safe(r, next, t) 198 damon_free_region(r); 199 kfree(t); 200 } 201 202 void damon_destroy_target(struct damon_target *t) 203 { 204 damon_del_target(t); 205 damon_free_target(t); 206 } 207 208 unsigned int damon_nr_regions(struct damon_target *t) 209 { 210 return t->nr_regions; 211 } 212 213 struct damon_ctx *damon_new_ctx(void) 214 { 215 struct damon_ctx *ctx; 216 217 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 218 if (!ctx) 219 return NULL; 220 221 ctx->sample_interval = 5 * 1000; 222 ctx->aggr_interval = 100 * 1000; 223 ctx->primitive_update_interval = 60 * 1000 * 1000; 224 225 ktime_get_coarse_ts64(&ctx->last_aggregation); 226 ctx->last_primitive_update = ctx->last_aggregation; 227 228 mutex_init(&ctx->kdamond_lock); 229 230 ctx->min_nr_regions = 10; 231 ctx->max_nr_regions = 1000; 232 233 INIT_LIST_HEAD(&ctx->adaptive_targets); 234 INIT_LIST_HEAD(&ctx->schemes); 235 236 return ctx; 237 } 238 239 static void damon_destroy_targets(struct damon_ctx *ctx) 240 { 241 struct damon_target *t, *next_t; 242 243 if (ctx->primitive.cleanup) { 244 ctx->primitive.cleanup(ctx); 245 return; 246 } 247 248 damon_for_each_target_safe(t, next_t, ctx) 249 damon_destroy_target(t); 250 } 251 252 void damon_destroy_ctx(struct damon_ctx *ctx) 253 { 254 struct damos *s, *next_s; 255 256 damon_destroy_targets(ctx); 257 258 damon_for_each_scheme_safe(s, next_s, ctx) 259 damon_destroy_scheme(s); 260 261 kfree(ctx); 262 } 263 264 /** 265 * damon_set_targets() - Set monitoring targets. 266 * @ctx: monitoring context 267 * @ids: array of target ids 268 * @nr_ids: number of entries in @ids 269 * 270 * This function should not be called while the kdamond is running. 271 * 272 * Return: 0 on success, negative error code otherwise. 273 */ 274 int damon_set_targets(struct damon_ctx *ctx, 275 unsigned long *ids, ssize_t nr_ids) 276 { 277 ssize_t i; 278 struct damon_target *t, *next; 279 280 damon_destroy_targets(ctx); 281 282 for (i = 0; i < nr_ids; i++) { 283 t = damon_new_target(ids[i]); 284 if (!t) { 285 /* The caller should do cleanup of the ids itself */ 286 damon_for_each_target_safe(t, next, ctx) 287 damon_destroy_target(t); 288 return -ENOMEM; 289 } 290 damon_add_target(ctx, t); 291 } 292 293 return 0; 294 } 295 296 /** 297 * damon_set_attrs() - Set attributes for the monitoring. 298 * @ctx: monitoring context 299 * @sample_int: time interval between samplings 300 * @aggr_int: time interval between aggregations 301 * @primitive_upd_int: time interval between monitoring primitive updates 302 * @min_nr_reg: minimal number of regions 303 * @max_nr_reg: maximum number of regions 304 * 305 * This function should not be called while the kdamond is running. 306 * Every time interval is in micro-seconds. 307 * 308 * Return: 0 on success, negative error code otherwise. 309 */ 310 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, 311 unsigned long aggr_int, unsigned long primitive_upd_int, 312 unsigned long min_nr_reg, unsigned long max_nr_reg) 313 { 314 if (min_nr_reg < 3) 315 return -EINVAL; 316 if (min_nr_reg > max_nr_reg) 317 return -EINVAL; 318 319 ctx->sample_interval = sample_int; 320 ctx->aggr_interval = aggr_int; 321 ctx->primitive_update_interval = primitive_upd_int; 322 ctx->min_nr_regions = min_nr_reg; 323 ctx->max_nr_regions = max_nr_reg; 324 325 return 0; 326 } 327 328 /** 329 * damon_set_schemes() - Set data access monitoring based operation schemes. 330 * @ctx: monitoring context 331 * @schemes: array of the schemes 332 * @nr_schemes: number of entries in @schemes 333 * 334 * This function should not be called while the kdamond of the context is 335 * running. 336 * 337 * Return: 0 if success, or negative error code otherwise. 338 */ 339 int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, 340 ssize_t nr_schemes) 341 { 342 struct damos *s, *next; 343 ssize_t i; 344 345 damon_for_each_scheme_safe(s, next, ctx) 346 damon_destroy_scheme(s); 347 for (i = 0; i < nr_schemes; i++) 348 damon_add_scheme(ctx, schemes[i]); 349 return 0; 350 } 351 352 /** 353 * damon_nr_running_ctxs() - Return number of currently running contexts. 354 */ 355 int damon_nr_running_ctxs(void) 356 { 357 int nr_ctxs; 358 359 mutex_lock(&damon_lock); 360 nr_ctxs = nr_running_ctxs; 361 mutex_unlock(&damon_lock); 362 363 return nr_ctxs; 364 } 365 366 /* Returns the size upper limit for each monitoring region */ 367 static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) 368 { 369 struct damon_target *t; 370 struct damon_region *r; 371 unsigned long sz = 0; 372 373 damon_for_each_target(t, ctx) { 374 damon_for_each_region(r, t) 375 sz += r->ar.end - r->ar.start; 376 } 377 378 if (ctx->min_nr_regions) 379 sz /= ctx->min_nr_regions; 380 if (sz < DAMON_MIN_REGION) 381 sz = DAMON_MIN_REGION; 382 383 return sz; 384 } 385 386 static int kdamond_fn(void *data); 387 388 /* 389 * __damon_start() - Starts monitoring with given context. 390 * @ctx: monitoring context 391 * 392 * This function should be called while damon_lock is hold. 393 * 394 * Return: 0 on success, negative error code otherwise. 395 */ 396 static int __damon_start(struct damon_ctx *ctx) 397 { 398 int err = -EBUSY; 399 400 mutex_lock(&ctx->kdamond_lock); 401 if (!ctx->kdamond) { 402 err = 0; 403 ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", 404 nr_running_ctxs); 405 if (IS_ERR(ctx->kdamond)) { 406 err = PTR_ERR(ctx->kdamond); 407 ctx->kdamond = NULL; 408 } 409 } 410 mutex_unlock(&ctx->kdamond_lock); 411 412 return err; 413 } 414 415 /** 416 * damon_start() - Starts the monitorings for a given group of contexts. 417 * @ctxs: an array of the pointers for contexts to start monitoring 418 * @nr_ctxs: size of @ctxs 419 * 420 * This function starts a group of monitoring threads for a group of monitoring 421 * contexts. One thread per each context is created and run in parallel. The 422 * caller should handle synchronization between the threads by itself. If a 423 * group of threads that created by other 'damon_start()' call is currently 424 * running, this function does nothing but returns -EBUSY. 425 * 426 * Return: 0 on success, negative error code otherwise. 427 */ 428 int damon_start(struct damon_ctx **ctxs, int nr_ctxs) 429 { 430 int i; 431 int err = 0; 432 433 mutex_lock(&damon_lock); 434 if (nr_running_ctxs) { 435 mutex_unlock(&damon_lock); 436 return -EBUSY; 437 } 438 439 for (i = 0; i < nr_ctxs; i++) { 440 err = __damon_start(ctxs[i]); 441 if (err) 442 break; 443 nr_running_ctxs++; 444 } 445 mutex_unlock(&damon_lock); 446 447 return err; 448 } 449 450 /* 451 * __damon_stop() - Stops monitoring of given context. 452 * @ctx: monitoring context 453 * 454 * Return: 0 on success, negative error code otherwise. 455 */ 456 static int __damon_stop(struct damon_ctx *ctx) 457 { 458 struct task_struct *tsk; 459 460 mutex_lock(&ctx->kdamond_lock); 461 tsk = ctx->kdamond; 462 if (tsk) { 463 get_task_struct(tsk); 464 mutex_unlock(&ctx->kdamond_lock); 465 kthread_stop(tsk); 466 put_task_struct(tsk); 467 return 0; 468 } 469 mutex_unlock(&ctx->kdamond_lock); 470 471 return -EPERM; 472 } 473 474 /** 475 * damon_stop() - Stops the monitorings for a given group of contexts. 476 * @ctxs: an array of the pointers for contexts to stop monitoring 477 * @nr_ctxs: size of @ctxs 478 * 479 * Return: 0 on success, negative error code otherwise. 480 */ 481 int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) 482 { 483 int i, err = 0; 484 485 for (i = 0; i < nr_ctxs; i++) { 486 /* nr_running_ctxs is decremented in kdamond_fn */ 487 err = __damon_stop(ctxs[i]); 488 if (err) 489 return err; 490 } 491 492 return err; 493 } 494 495 /* 496 * damon_check_reset_time_interval() - Check if a time interval is elapsed. 497 * @baseline: the time to check whether the interval has elapsed since 498 * @interval: the time interval (microseconds) 499 * 500 * See whether the given time interval has passed since the given baseline 501 * time. If so, it also updates the baseline to current time for next check. 502 * 503 * Return: true if the time interval has passed, or false otherwise. 504 */ 505 static bool damon_check_reset_time_interval(struct timespec64 *baseline, 506 unsigned long interval) 507 { 508 struct timespec64 now; 509 510 ktime_get_coarse_ts64(&now); 511 if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) < 512 interval * 1000) 513 return false; 514 *baseline = now; 515 return true; 516 } 517 518 /* 519 * Check whether it is time to flush the aggregated information 520 */ 521 static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx) 522 { 523 return damon_check_reset_time_interval(&ctx->last_aggregation, 524 ctx->aggr_interval); 525 } 526 527 /* 528 * Reset the aggregated monitoring results ('nr_accesses' of each region). 529 */ 530 static void kdamond_reset_aggregated(struct damon_ctx *c) 531 { 532 struct damon_target *t; 533 534 damon_for_each_target(t, c) { 535 struct damon_region *r; 536 537 damon_for_each_region(r, t) { 538 trace_damon_aggregated(t, r, damon_nr_regions(t)); 539 r->last_nr_accesses = r->nr_accesses; 540 r->nr_accesses = 0; 541 } 542 } 543 } 544 545 static void damon_split_region_at(struct damon_ctx *ctx, 546 struct damon_target *t, struct damon_region *r, 547 unsigned long sz_r); 548 549 static bool __damos_valid_target(struct damon_region *r, struct damos *s) 550 { 551 unsigned long sz; 552 553 sz = r->ar.end - r->ar.start; 554 return s->min_sz_region <= sz && sz <= s->max_sz_region && 555 s->min_nr_accesses <= r->nr_accesses && 556 r->nr_accesses <= s->max_nr_accesses && 557 s->min_age_region <= r->age && r->age <= s->max_age_region; 558 } 559 560 static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, 561 struct damon_region *r, struct damos *s) 562 { 563 bool ret = __damos_valid_target(r, s); 564 565 if (!ret || !s->quota.esz || !c->primitive.get_scheme_score) 566 return ret; 567 568 return c->primitive.get_scheme_score(c, t, r, s) >= s->quota.min_score; 569 } 570 571 static void damon_do_apply_schemes(struct damon_ctx *c, 572 struct damon_target *t, 573 struct damon_region *r) 574 { 575 struct damos *s; 576 577 damon_for_each_scheme(s, c) { 578 struct damos_quota *quota = &s->quota; 579 unsigned long sz = r->ar.end - r->ar.start; 580 struct timespec64 begin, end; 581 582 if (!s->wmarks.activated) 583 continue; 584 585 /* Check the quota */ 586 if (quota->esz && quota->charged_sz >= quota->esz) 587 continue; 588 589 /* Skip previously charged regions */ 590 if (quota->charge_target_from) { 591 if (t != quota->charge_target_from) 592 continue; 593 if (r == damon_last_region(t)) { 594 quota->charge_target_from = NULL; 595 quota->charge_addr_from = 0; 596 continue; 597 } 598 if (quota->charge_addr_from && 599 r->ar.end <= quota->charge_addr_from) 600 continue; 601 602 if (quota->charge_addr_from && r->ar.start < 603 quota->charge_addr_from) { 604 sz = ALIGN_DOWN(quota->charge_addr_from - 605 r->ar.start, DAMON_MIN_REGION); 606 if (!sz) { 607 if (r->ar.end - r->ar.start <= 608 DAMON_MIN_REGION) 609 continue; 610 sz = DAMON_MIN_REGION; 611 } 612 damon_split_region_at(c, t, r, sz); 613 r = damon_next_region(r); 614 sz = r->ar.end - r->ar.start; 615 } 616 quota->charge_target_from = NULL; 617 quota->charge_addr_from = 0; 618 } 619 620 if (!damos_valid_target(c, t, r, s)) 621 continue; 622 623 /* Apply the scheme */ 624 if (c->primitive.apply_scheme) { 625 if (quota->esz && 626 quota->charged_sz + sz > quota->esz) { 627 sz = ALIGN_DOWN(quota->esz - quota->charged_sz, 628 DAMON_MIN_REGION); 629 if (!sz) 630 goto update_stat; 631 damon_split_region_at(c, t, r, sz); 632 } 633 ktime_get_coarse_ts64(&begin); 634 c->primitive.apply_scheme(c, t, r, s); 635 ktime_get_coarse_ts64(&end); 636 quota->total_charged_ns += timespec64_to_ns(&end) - 637 timespec64_to_ns(&begin); 638 quota->charged_sz += sz; 639 if (quota->esz && quota->charged_sz >= quota->esz) { 640 quota->charge_target_from = t; 641 quota->charge_addr_from = r->ar.end + 1; 642 } 643 } 644 if (s->action != DAMOS_STAT) 645 r->age = 0; 646 647 update_stat: 648 s->stat_count++; 649 s->stat_sz += sz; 650 } 651 } 652 653 /* Shouldn't be called if quota->ms and quota->sz are zero */ 654 static void damos_set_effective_quota(struct damos_quota *quota) 655 { 656 unsigned long throughput; 657 unsigned long esz; 658 659 if (!quota->ms) { 660 quota->esz = quota->sz; 661 return; 662 } 663 664 if (quota->total_charged_ns) 665 throughput = quota->total_charged_sz * 1000000 / 666 quota->total_charged_ns; 667 else 668 throughput = PAGE_SIZE * 1024; 669 esz = throughput * quota->ms; 670 671 if (quota->sz && quota->sz < esz) 672 esz = quota->sz; 673 quota->esz = esz; 674 } 675 676 static void kdamond_apply_schemes(struct damon_ctx *c) 677 { 678 struct damon_target *t; 679 struct damon_region *r, *next_r; 680 struct damos *s; 681 682 damon_for_each_scheme(s, c) { 683 struct damos_quota *quota = &s->quota; 684 unsigned long cumulated_sz; 685 unsigned int score, max_score = 0; 686 687 if (!s->wmarks.activated) 688 continue; 689 690 if (!quota->ms && !quota->sz) 691 continue; 692 693 /* New charge window starts */ 694 if (time_after_eq(jiffies, quota->charged_from + 695 msecs_to_jiffies( 696 quota->reset_interval))) { 697 quota->total_charged_sz += quota->charged_sz; 698 quota->charged_from = jiffies; 699 quota->charged_sz = 0; 700 damos_set_effective_quota(quota); 701 } 702 703 if (!c->primitive.get_scheme_score) 704 continue; 705 706 /* Fill up the score histogram */ 707 memset(quota->histogram, 0, sizeof(quota->histogram)); 708 damon_for_each_target(t, c) { 709 damon_for_each_region(r, t) { 710 if (!__damos_valid_target(r, s)) 711 continue; 712 score = c->primitive.get_scheme_score( 713 c, t, r, s); 714 quota->histogram[score] += 715 r->ar.end - r->ar.start; 716 if (score > max_score) 717 max_score = score; 718 } 719 } 720 721 /* Set the min score limit */ 722 for (cumulated_sz = 0, score = max_score; ; score--) { 723 cumulated_sz += quota->histogram[score]; 724 if (cumulated_sz >= quota->esz || !score) 725 break; 726 } 727 quota->min_score = score; 728 } 729 730 damon_for_each_target(t, c) { 731 damon_for_each_region_safe(r, next_r, t) 732 damon_do_apply_schemes(c, t, r); 733 } 734 } 735 736 #define sz_damon_region(r) (r->ar.end - r->ar.start) 737 738 /* 739 * Merge two adjacent regions into one region 740 */ 741 static void damon_merge_two_regions(struct damon_target *t, 742 struct damon_region *l, struct damon_region *r) 743 { 744 unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r); 745 746 l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / 747 (sz_l + sz_r); 748 l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); 749 l->ar.end = r->ar.end; 750 damon_destroy_region(r, t); 751 } 752 753 #define diff_of(a, b) (a > b ? a - b : b - a) 754 755 /* 756 * Merge adjacent regions having similar access frequencies 757 * 758 * t target affected by this merge operation 759 * thres '->nr_accesses' diff threshold for the merge 760 * sz_limit size upper limit of each region 761 */ 762 static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, 763 unsigned long sz_limit) 764 { 765 struct damon_region *r, *prev = NULL, *next; 766 767 damon_for_each_region_safe(r, next, t) { 768 if (diff_of(r->nr_accesses, r->last_nr_accesses) > thres) 769 r->age = 0; 770 else 771 r->age++; 772 773 if (prev && prev->ar.end == r->ar.start && 774 diff_of(prev->nr_accesses, r->nr_accesses) <= thres && 775 sz_damon_region(prev) + sz_damon_region(r) <= sz_limit) 776 damon_merge_two_regions(t, prev, r); 777 else 778 prev = r; 779 } 780 } 781 782 /* 783 * Merge adjacent regions having similar access frequencies 784 * 785 * threshold '->nr_accesses' diff threshold for the merge 786 * sz_limit size upper limit of each region 787 * 788 * This function merges monitoring target regions which are adjacent and their 789 * access frequencies are similar. This is for minimizing the monitoring 790 * overhead under the dynamically changeable access pattern. If a merge was 791 * unnecessarily made, later 'kdamond_split_regions()' will revert it. 792 */ 793 static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, 794 unsigned long sz_limit) 795 { 796 struct damon_target *t; 797 798 damon_for_each_target(t, c) 799 damon_merge_regions_of(t, threshold, sz_limit); 800 } 801 802 /* 803 * Split a region in two 804 * 805 * r the region to be split 806 * sz_r size of the first sub-region that will be made 807 */ 808 static void damon_split_region_at(struct damon_ctx *ctx, 809 struct damon_target *t, struct damon_region *r, 810 unsigned long sz_r) 811 { 812 struct damon_region *new; 813 814 new = damon_new_region(r->ar.start + sz_r, r->ar.end); 815 if (!new) 816 return; 817 818 r->ar.end = new->ar.start; 819 820 new->age = r->age; 821 new->last_nr_accesses = r->last_nr_accesses; 822 823 damon_insert_region(new, r, damon_next_region(r), t); 824 } 825 826 /* Split every region in the given target into 'nr_subs' regions */ 827 static void damon_split_regions_of(struct damon_ctx *ctx, 828 struct damon_target *t, int nr_subs) 829 { 830 struct damon_region *r, *next; 831 unsigned long sz_region, sz_sub = 0; 832 int i; 833 834 damon_for_each_region_safe(r, next, t) { 835 sz_region = r->ar.end - r->ar.start; 836 837 for (i = 0; i < nr_subs - 1 && 838 sz_region > 2 * DAMON_MIN_REGION; i++) { 839 /* 840 * Randomly select size of left sub-region to be at 841 * least 10 percent and at most 90% of original region 842 */ 843 sz_sub = ALIGN_DOWN(damon_rand(1, 10) * 844 sz_region / 10, DAMON_MIN_REGION); 845 /* Do not allow blank region */ 846 if (sz_sub == 0 || sz_sub >= sz_region) 847 continue; 848 849 damon_split_region_at(ctx, t, r, sz_sub); 850 sz_region = sz_sub; 851 } 852 } 853 } 854 855 /* 856 * Split every target region into randomly-sized small regions 857 * 858 * This function splits every target region into random-sized small regions if 859 * current total number of the regions is equal or smaller than half of the 860 * user-specified maximum number of regions. This is for maximizing the 861 * monitoring accuracy under the dynamically changeable access patterns. If a 862 * split was unnecessarily made, later 'kdamond_merge_regions()' will revert 863 * it. 864 */ 865 static void kdamond_split_regions(struct damon_ctx *ctx) 866 { 867 struct damon_target *t; 868 unsigned int nr_regions = 0; 869 static unsigned int last_nr_regions; 870 int nr_subregions = 2; 871 872 damon_for_each_target(t, ctx) 873 nr_regions += damon_nr_regions(t); 874 875 if (nr_regions > ctx->max_nr_regions / 2) 876 return; 877 878 /* Maybe the middle of the region has different access frequency */ 879 if (last_nr_regions == nr_regions && 880 nr_regions < ctx->max_nr_regions / 3) 881 nr_subregions = 3; 882 883 damon_for_each_target(t, ctx) 884 damon_split_regions_of(ctx, t, nr_subregions); 885 886 last_nr_regions = nr_regions; 887 } 888 889 /* 890 * Check whether it is time to check and apply the target monitoring regions 891 * 892 * Returns true if it is. 893 */ 894 static bool kdamond_need_update_primitive(struct damon_ctx *ctx) 895 { 896 return damon_check_reset_time_interval(&ctx->last_primitive_update, 897 ctx->primitive_update_interval); 898 } 899 900 /* 901 * Check whether current monitoring should be stopped 902 * 903 * The monitoring is stopped when either the user requested to stop, or all 904 * monitoring targets are invalid. 905 * 906 * Returns true if need to stop current monitoring. 907 */ 908 static bool kdamond_need_stop(struct damon_ctx *ctx) 909 { 910 struct damon_target *t; 911 912 if (kthread_should_stop()) 913 return true; 914 915 if (!ctx->primitive.target_valid) 916 return false; 917 918 damon_for_each_target(t, ctx) { 919 if (ctx->primitive.target_valid(t)) 920 return false; 921 } 922 923 return true; 924 } 925 926 static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric) 927 { 928 struct sysinfo i; 929 930 switch (metric) { 931 case DAMOS_WMARK_FREE_MEM_RATE: 932 si_meminfo(&i); 933 return i.freeram * 1000 / i.totalram; 934 default: 935 break; 936 } 937 return -EINVAL; 938 } 939 940 /* 941 * Returns zero if the scheme is active. Else, returns time to wait for next 942 * watermark check in micro-seconds. 943 */ 944 static unsigned long damos_wmark_wait_us(struct damos *scheme) 945 { 946 unsigned long metric; 947 948 if (scheme->wmarks.metric == DAMOS_WMARK_NONE) 949 return 0; 950 951 metric = damos_wmark_metric_value(scheme->wmarks.metric); 952 /* higher than high watermark or lower than low watermark */ 953 if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) { 954 if (scheme->wmarks.activated) 955 pr_debug("deactivate a scheme (%d) for %s wmark\n", 956 scheme->action, 957 metric > scheme->wmarks.high ? 958 "high" : "low"); 959 scheme->wmarks.activated = false; 960 return scheme->wmarks.interval; 961 } 962 963 /* inactive and higher than middle watermark */ 964 if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) && 965 !scheme->wmarks.activated) 966 return scheme->wmarks.interval; 967 968 if (!scheme->wmarks.activated) 969 pr_debug("activate a scheme (%d)\n", scheme->action); 970 scheme->wmarks.activated = true; 971 return 0; 972 } 973 974 static void kdamond_usleep(unsigned long usecs) 975 { 976 /* See Documentation/timers/timers-howto.rst for the thresholds */ 977 if (usecs > 20 * USEC_PER_MSEC) 978 schedule_timeout_idle(usecs_to_jiffies(usecs)); 979 else 980 usleep_idle_range(usecs, usecs + 1); 981 } 982 983 /* Returns negative error code if it's not activated but should return */ 984 static int kdamond_wait_activation(struct damon_ctx *ctx) 985 { 986 struct damos *s; 987 unsigned long wait_time; 988 unsigned long min_wait_time = 0; 989 990 while (!kdamond_need_stop(ctx)) { 991 damon_for_each_scheme(s, ctx) { 992 wait_time = damos_wmark_wait_us(s); 993 if (!min_wait_time || wait_time < min_wait_time) 994 min_wait_time = wait_time; 995 } 996 if (!min_wait_time) 997 return 0; 998 999 kdamond_usleep(min_wait_time); 1000 } 1001 return -EBUSY; 1002 } 1003 1004 /* 1005 * The monitoring daemon that runs as a kernel thread 1006 */ 1007 static int kdamond_fn(void *data) 1008 { 1009 struct damon_ctx *ctx = (struct damon_ctx *)data; 1010 struct damon_target *t; 1011 struct damon_region *r, *next; 1012 unsigned int max_nr_accesses = 0; 1013 unsigned long sz_limit = 0; 1014 bool done = false; 1015 1016 pr_debug("kdamond (%d) starts\n", current->pid); 1017 1018 if (ctx->primitive.init) 1019 ctx->primitive.init(ctx); 1020 if (ctx->callback.before_start && ctx->callback.before_start(ctx)) 1021 done = true; 1022 1023 sz_limit = damon_region_sz_limit(ctx); 1024 1025 while (!kdamond_need_stop(ctx) && !done) { 1026 if (kdamond_wait_activation(ctx)) 1027 continue; 1028 1029 if (ctx->primitive.prepare_access_checks) 1030 ctx->primitive.prepare_access_checks(ctx); 1031 if (ctx->callback.after_sampling && 1032 ctx->callback.after_sampling(ctx)) 1033 done = true; 1034 1035 kdamond_usleep(ctx->sample_interval); 1036 1037 if (ctx->primitive.check_accesses) 1038 max_nr_accesses = ctx->primitive.check_accesses(ctx); 1039 1040 if (kdamond_aggregate_interval_passed(ctx)) { 1041 kdamond_merge_regions(ctx, 1042 max_nr_accesses / 10, 1043 sz_limit); 1044 if (ctx->callback.after_aggregation && 1045 ctx->callback.after_aggregation(ctx)) 1046 done = true; 1047 kdamond_apply_schemes(ctx); 1048 kdamond_reset_aggregated(ctx); 1049 kdamond_split_regions(ctx); 1050 if (ctx->primitive.reset_aggregated) 1051 ctx->primitive.reset_aggregated(ctx); 1052 } 1053 1054 if (kdamond_need_update_primitive(ctx)) { 1055 if (ctx->primitive.update) 1056 ctx->primitive.update(ctx); 1057 sz_limit = damon_region_sz_limit(ctx); 1058 } 1059 } 1060 damon_for_each_target(t, ctx) { 1061 damon_for_each_region_safe(r, next, t) 1062 damon_destroy_region(r, t); 1063 } 1064 1065 if (ctx->callback.before_terminate) 1066 ctx->callback.before_terminate(ctx); 1067 if (ctx->primitive.cleanup) 1068 ctx->primitive.cleanup(ctx); 1069 1070 pr_debug("kdamond (%d) finishes\n", current->pid); 1071 mutex_lock(&ctx->kdamond_lock); 1072 ctx->kdamond = NULL; 1073 mutex_unlock(&ctx->kdamond_lock); 1074 1075 mutex_lock(&damon_lock); 1076 nr_running_ctxs--; 1077 mutex_unlock(&damon_lock); 1078 1079 return 0; 1080 } 1081 1082 #include "core-test.h" 1083