1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * DAMON api 4 * 5 * Author: SeongJae Park <sjpark@amazon.de> 6 */ 7 8 #ifndef _DAMON_H_ 9 #define _DAMON_H_ 10 11 #include <linux/mutex.h> 12 #include <linux/time64.h> 13 #include <linux/types.h> 14 15 /* Minimal region size. Every damon_region is aligned by this. */ 16 #define DAMON_MIN_REGION PAGE_SIZE 17 /* Max priority score for DAMON-based operation schemes */ 18 #define DAMOS_MAX_SCORE (99) 19 20 /** 21 * struct damon_addr_range - Represents an address region of [@start, @end). 22 * @start: Start address of the region (inclusive). 23 * @end: End address of the region (exclusive). 24 */ 25 struct damon_addr_range { 26 unsigned long start; 27 unsigned long end; 28 }; 29 30 /** 31 * struct damon_region - Represents a monitoring target region. 32 * @ar: The address range of the region. 33 * @sampling_addr: Address of the sample for the next access check. 34 * @nr_accesses: Access frequency of this region. 35 * @list: List head for siblings. 36 * @age: Age of this region. 37 * 38 * @age is initially zero, increased for each aggregation interval, and reset 39 * to zero again if the access frequency is significantly changed. If two 40 * regions are merged into a new region, both @nr_accesses and @age of the new 41 * region are set as region size-weighted average of those of the two regions. 42 */ 43 struct damon_region { 44 struct damon_addr_range ar; 45 unsigned long sampling_addr; 46 unsigned int nr_accesses; 47 struct list_head list; 48 49 unsigned int age; 50 /* private: Internal value for age calculation. */ 51 unsigned int last_nr_accesses; 52 }; 53 54 /** 55 * struct damon_target - Represents a monitoring target. 56 * @id: Unique identifier for this target. 57 * @nr_regions: Number of monitoring target regions of this target. 58 * @regions_list: Head of the monitoring target regions of this target. 59 * @list: List head for siblings. 60 * 61 * Each monitoring context could have multiple targets. For example, a context 62 * for virtual memory address spaces could have multiple target processes. The 63 * @id of each target should be unique among the targets of the context. For 64 * example, in the virtual address monitoring context, it could be a pidfd or 65 * an address of an mm_struct. 66 */ 67 struct damon_target { 68 unsigned long id; 69 unsigned int nr_regions; 70 struct list_head regions_list; 71 struct list_head list; 72 }; 73 74 /** 75 * enum damos_action - Represents an action of a Data Access Monitoring-based 76 * Operation Scheme. 77 * 78 * @DAMOS_WILLNEED: Call ``madvise()`` for the region with MADV_WILLNEED. 79 * @DAMOS_COLD: Call ``madvise()`` for the region with MADV_COLD. 80 * @DAMOS_PAGEOUT: Call ``madvise()`` for the region with MADV_PAGEOUT. 81 * @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE. 82 * @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE. 83 * @DAMOS_STAT: Do nothing but count the stat. 84 */ 85 enum damos_action { 86 DAMOS_WILLNEED, 87 DAMOS_COLD, 88 DAMOS_PAGEOUT, 89 DAMOS_HUGEPAGE, 90 DAMOS_NOHUGEPAGE, 91 DAMOS_STAT, /* Do nothing but only record the stat */ 92 }; 93 94 /** 95 * struct damos_quota - Controls the aggressiveness of the given scheme. 96 * @ms: Maximum milliseconds that the scheme can use. 97 * @sz: Maximum bytes of memory that the action can be applied. 98 * @reset_interval: Charge reset interval in milliseconds. 99 * 100 * @weight_sz: Weight of the region's size for prioritization. 101 * @weight_nr_accesses: Weight of the region's nr_accesses for prioritization. 102 * @weight_age: Weight of the region's age for prioritization. 103 * 104 * To avoid consuming too much CPU time or IO resources for applying the 105 * &struct damos->action to large memory, DAMON allows users to set time and/or 106 * size quotas. The quotas can be set by writing non-zero values to &ms and 107 * &sz, respectively. If the time quota is set, DAMON tries to use only up to 108 * &ms milliseconds within &reset_interval for applying the action. If the 109 * size quota is set, DAMON tries to apply the action only up to &sz bytes 110 * within &reset_interval. 111 * 112 * Internally, the time quota is transformed to a size quota using estimated 113 * throughput of the scheme's action. DAMON then compares it against &sz and 114 * uses smaller one as the effective quota. 115 * 116 * For selecting regions within the quota, DAMON prioritizes current scheme's 117 * target memory regions using the &struct damon_primitive->get_scheme_score. 118 * You could customize the prioritization logic by setting &weight_sz, 119 * &weight_nr_accesses, and &weight_age, because monitoring primitives are 120 * encouraged to respect those. 121 */ 122 struct damos_quota { 123 unsigned long ms; 124 unsigned long sz; 125 unsigned long reset_interval; 126 127 unsigned int weight_sz; 128 unsigned int weight_nr_accesses; 129 unsigned int weight_age; 130 131 /* private: */ 132 /* For throughput estimation */ 133 unsigned long total_charged_sz; 134 unsigned long total_charged_ns; 135 136 unsigned long esz; /* Effective size quota in bytes */ 137 138 /* For charging the quota */ 139 unsigned long charged_sz; 140 unsigned long charged_from; 141 struct damon_target *charge_target_from; 142 unsigned long charge_addr_from; 143 144 /* For prioritization */ 145 unsigned long histogram[DAMOS_MAX_SCORE + 1]; 146 unsigned int min_score; 147 }; 148 149 /** 150 * enum damos_wmark_metric - Represents the watermark metric. 151 * 152 * @DAMOS_WMARK_NONE: Ignore the watermarks of the given scheme. 153 * @DAMOS_WMARK_FREE_MEM_RATE: Free memory rate of the system in [0,1000]. 154 */ 155 enum damos_wmark_metric { 156 DAMOS_WMARK_NONE, 157 DAMOS_WMARK_FREE_MEM_RATE, 158 }; 159 160 /** 161 * struct damos_watermarks - Controls when a given scheme should be activated. 162 * @metric: Metric for the watermarks. 163 * @interval: Watermarks check time interval in microseconds. 164 * @high: High watermark. 165 * @mid: Middle watermark. 166 * @low: Low watermark. 167 * 168 * If &metric is &DAMOS_WMARK_NONE, the scheme is always active. Being active 169 * means DAMON does monitoring and applying the action of the scheme to 170 * appropriate memory regions. Else, DAMON checks &metric of the system for at 171 * least every &interval microseconds and works as below. 172 * 173 * If &metric is higher than &high, the scheme is inactivated. If &metric is 174 * between &mid and &low, the scheme is activated. If &metric is lower than 175 * &low, the scheme is inactivated. 176 */ 177 struct damos_watermarks { 178 enum damos_wmark_metric metric; 179 unsigned long interval; 180 unsigned long high; 181 unsigned long mid; 182 unsigned long low; 183 184 /* private: */ 185 bool activated; 186 }; 187 188 /** 189 * struct damos - Represents a Data Access Monitoring-based Operation Scheme. 190 * @min_sz_region: Minimum size of target regions. 191 * @max_sz_region: Maximum size of target regions. 192 * @min_nr_accesses: Minimum ``->nr_accesses`` of target regions. 193 * @max_nr_accesses: Maximum ``->nr_accesses`` of target regions. 194 * @min_age_region: Minimum age of target regions. 195 * @max_age_region: Maximum age of target regions. 196 * @action: &damo_action to be applied to the target regions. 197 * @quota: Control the aggressiveness of this scheme. 198 * @wmarks: Watermarks for automated (in)activation of this scheme. 199 * @stat_count: Total number of regions that this scheme is applied. 200 * @stat_sz: Total size of regions that this scheme is applied. 201 * @list: List head for siblings. 202 * 203 * For each aggregation interval, DAMON finds regions which fit in the 204 * condition (&min_sz_region, &max_sz_region, &min_nr_accesses, 205 * &max_nr_accesses, &min_age_region, &max_age_region) and applies &action to 206 * those. To avoid consuming too much CPU time or IO resources for the 207 * &action, "a is used. 208 * 209 * To do the work only when needed, schemes can be activated for specific 210 * system situations using &wmarks. If all schemes that registered to the 211 * monitoring context are inactive, DAMON stops monitoring either, and just 212 * repeatedly checks the watermarks. 213 * 214 * If all schemes that registered to a &struct damon_ctx are inactive, DAMON 215 * stops monitoring and just repeatedly checks the watermarks. 216 * 217 * After applying the &action to each region, &stat_count and &stat_sz is 218 * updated to reflect the number of regions and total size of regions that the 219 * &action is applied. 220 */ 221 struct damos { 222 unsigned long min_sz_region; 223 unsigned long max_sz_region; 224 unsigned int min_nr_accesses; 225 unsigned int max_nr_accesses; 226 unsigned int min_age_region; 227 unsigned int max_age_region; 228 enum damos_action action; 229 struct damos_quota quota; 230 struct damos_watermarks wmarks; 231 unsigned long stat_count; 232 unsigned long stat_sz; 233 struct list_head list; 234 }; 235 236 struct damon_ctx; 237 238 /** 239 * struct damon_primitive - Monitoring primitives for given use cases. 240 * 241 * @init: Initialize primitive-internal data structures. 242 * @update: Update primitive-internal data structures. 243 * @prepare_access_checks: Prepare next access check of target regions. 244 * @check_accesses: Check the accesses to target regions. 245 * @reset_aggregated: Reset aggregated accesses monitoring results. 246 * @get_scheme_score: Get the score of a region for a scheme. 247 * @apply_scheme: Apply a DAMON-based operation scheme. 248 * @target_valid: Determine if the target is valid. 249 * @cleanup: Clean up the context. 250 * 251 * DAMON can be extended for various address spaces and usages. For this, 252 * users should register the low level primitives for their target address 253 * space and usecase via the &damon_ctx.primitive. Then, the monitoring thread 254 * (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting 255 * the monitoring, @update after each &damon_ctx.primitive_update_interval, and 256 * @check_accesses, @target_valid and @prepare_access_checks after each 257 * &damon_ctx.sample_interval. Finally, @reset_aggregated is called after each 258 * &damon_ctx.aggr_interval. 259 * 260 * @init should initialize primitive-internal data structures. For example, 261 * this could be used to construct proper monitoring target regions and link 262 * those to @damon_ctx.adaptive_targets. 263 * @update should update the primitive-internal data structures. For example, 264 * this could be used to update monitoring target regions for current status. 265 * @prepare_access_checks should manipulate the monitoring regions to be 266 * prepared for the next access check. 267 * @check_accesses should check the accesses to each region that made after the 268 * last preparation and update the number of observed accesses of each region. 269 * It should also return max number of observed accesses that made as a result 270 * of its update. The value will be used for regions adjustment threshold. 271 * @reset_aggregated should reset the access monitoring results that aggregated 272 * by @check_accesses. 273 * @get_scheme_score should return the priority score of a region for a scheme 274 * as an integer in [0, &DAMOS_MAX_SCORE]. 275 * @apply_scheme is called from @kdamond when a region for user provided 276 * DAMON-based operation scheme is found. It should apply the scheme's action 277 * to the region. This is not used for &DAMON_ARBITRARY_TARGET case. 278 * @target_valid should check whether the target is still valid for the 279 * monitoring. 280 * @cleanup is called from @kdamond just before its termination. 281 */ 282 struct damon_primitive { 283 void (*init)(struct damon_ctx *context); 284 void (*update)(struct damon_ctx *context); 285 void (*prepare_access_checks)(struct damon_ctx *context); 286 unsigned int (*check_accesses)(struct damon_ctx *context); 287 void (*reset_aggregated)(struct damon_ctx *context); 288 int (*get_scheme_score)(struct damon_ctx *context, 289 struct damon_target *t, struct damon_region *r, 290 struct damos *scheme); 291 int (*apply_scheme)(struct damon_ctx *context, struct damon_target *t, 292 struct damon_region *r, struct damos *scheme); 293 bool (*target_valid)(void *target); 294 void (*cleanup)(struct damon_ctx *context); 295 }; 296 297 /** 298 * struct damon_callback - Monitoring events notification callbacks. 299 * 300 * @before_start: Called before starting the monitoring. 301 * @after_sampling: Called after each sampling. 302 * @after_aggregation: Called after each aggregation. 303 * @before_terminate: Called before terminating the monitoring. 304 * @private: User private data. 305 * 306 * The monitoring thread (&damon_ctx.kdamond) calls @before_start and 307 * @before_terminate just before starting and finishing the monitoring, 308 * respectively. Therefore, those are good places for installing and cleaning 309 * @private. 310 * 311 * The monitoring thread calls @after_sampling and @after_aggregation for each 312 * of the sampling intervals and aggregation intervals, respectively. 313 * Therefore, users can safely access the monitoring results without additional 314 * protection. For the reason, users are recommended to use these callback for 315 * the accesses to the results. 316 * 317 * If any callback returns non-zero, monitoring stops. 318 */ 319 struct damon_callback { 320 void *private; 321 322 int (*before_start)(struct damon_ctx *context); 323 int (*after_sampling)(struct damon_ctx *context); 324 int (*after_aggregation)(struct damon_ctx *context); 325 void (*before_terminate)(struct damon_ctx *context); 326 }; 327 328 /** 329 * struct damon_ctx - Represents a context for each monitoring. This is the 330 * main interface that allows users to set the attributes and get the results 331 * of the monitoring. 332 * 333 * @sample_interval: The time between access samplings. 334 * @aggr_interval: The time between monitor results aggregations. 335 * @primitive_update_interval: The time between monitoring primitive updates. 336 * 337 * For each @sample_interval, DAMON checks whether each region is accessed or 338 * not. It aggregates and keeps the access information (number of accesses to 339 * each region) for @aggr_interval time. DAMON also checks whether the target 340 * memory regions need update (e.g., by ``mmap()`` calls from the application, 341 * in case of virtual memory monitoring) and applies the changes for each 342 * @primitive_update_interval. All time intervals are in micro-seconds. 343 * Please refer to &struct damon_primitive and &struct damon_callback for more 344 * detail. 345 * 346 * @kdamond: Kernel thread who does the monitoring. 347 * @kdamond_stop: Notifies whether kdamond should stop. 348 * @kdamond_lock: Mutex for the synchronizations with @kdamond. 349 * 350 * For each monitoring context, one kernel thread for the monitoring is 351 * created. The pointer to the thread is stored in @kdamond. 352 * 353 * Once started, the monitoring thread runs until explicitly required to be 354 * terminated or every monitoring target is invalid. The validity of the 355 * targets is checked via the &damon_primitive.target_valid of @primitive. The 356 * termination can also be explicitly requested by writing non-zero to 357 * @kdamond_stop. The thread sets @kdamond to NULL when it terminates. 358 * Therefore, users can know whether the monitoring is ongoing or terminated by 359 * reading @kdamond. Reads and writes to @kdamond and @kdamond_stop from 360 * outside of the monitoring thread must be protected by @kdamond_lock. 361 * 362 * Note that the monitoring thread protects only @kdamond and @kdamond_stop via 363 * @kdamond_lock. Accesses to other fields must be protected by themselves. 364 * 365 * @primitive: Set of monitoring primitives for given use cases. 366 * @callback: Set of callbacks for monitoring events notifications. 367 * 368 * @min_nr_regions: The minimum number of adaptive monitoring regions. 369 * @max_nr_regions: The maximum number of adaptive monitoring regions. 370 * @adaptive_targets: Head of monitoring targets (&damon_target) list. 371 * @schemes: Head of schemes (&damos) list. 372 */ 373 struct damon_ctx { 374 unsigned long sample_interval; 375 unsigned long aggr_interval; 376 unsigned long primitive_update_interval; 377 378 /* private: internal use only */ 379 struct timespec64 last_aggregation; 380 struct timespec64 last_primitive_update; 381 382 /* public: */ 383 struct task_struct *kdamond; 384 struct mutex kdamond_lock; 385 386 struct damon_primitive primitive; 387 struct damon_callback callback; 388 389 unsigned long min_nr_regions; 390 unsigned long max_nr_regions; 391 struct list_head adaptive_targets; 392 struct list_head schemes; 393 }; 394 395 #define damon_next_region(r) \ 396 (container_of(r->list.next, struct damon_region, list)) 397 398 #define damon_prev_region(r) \ 399 (container_of(r->list.prev, struct damon_region, list)) 400 401 #define damon_last_region(t) \ 402 (list_last_entry(&t->regions_list, struct damon_region, list)) 403 404 #define damon_for_each_region(r, t) \ 405 list_for_each_entry(r, &t->regions_list, list) 406 407 #define damon_for_each_region_safe(r, next, t) \ 408 list_for_each_entry_safe(r, next, &t->regions_list, list) 409 410 #define damon_for_each_target(t, ctx) \ 411 list_for_each_entry(t, &(ctx)->adaptive_targets, list) 412 413 #define damon_for_each_target_safe(t, next, ctx) \ 414 list_for_each_entry_safe(t, next, &(ctx)->adaptive_targets, list) 415 416 #define damon_for_each_scheme(s, ctx) \ 417 list_for_each_entry(s, &(ctx)->schemes, list) 418 419 #define damon_for_each_scheme_safe(s, next, ctx) \ 420 list_for_each_entry_safe(s, next, &(ctx)->schemes, list) 421 422 #ifdef CONFIG_DAMON 423 424 struct damon_region *damon_new_region(unsigned long start, unsigned long end); 425 inline void damon_insert_region(struct damon_region *r, 426 struct damon_region *prev, struct damon_region *next, 427 struct damon_target *t); 428 void damon_add_region(struct damon_region *r, struct damon_target *t); 429 void damon_destroy_region(struct damon_region *r, struct damon_target *t); 430 431 struct damos *damon_new_scheme( 432 unsigned long min_sz_region, unsigned long max_sz_region, 433 unsigned int min_nr_accesses, unsigned int max_nr_accesses, 434 unsigned int min_age_region, unsigned int max_age_region, 435 enum damos_action action, struct damos_quota *quota, 436 struct damos_watermarks *wmarks); 437 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); 438 void damon_destroy_scheme(struct damos *s); 439 440 struct damon_target *damon_new_target(unsigned long id); 441 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t); 442 bool damon_targets_empty(struct damon_ctx *ctx); 443 void damon_free_target(struct damon_target *t); 444 void damon_destroy_target(struct damon_target *t); 445 unsigned int damon_nr_regions(struct damon_target *t); 446 447 struct damon_ctx *damon_new_ctx(void); 448 void damon_destroy_ctx(struct damon_ctx *ctx); 449 int damon_set_targets(struct damon_ctx *ctx, 450 unsigned long *ids, ssize_t nr_ids); 451 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, 452 unsigned long aggr_int, unsigned long primitive_upd_int, 453 unsigned long min_nr_reg, unsigned long max_nr_reg); 454 int damon_set_schemes(struct damon_ctx *ctx, 455 struct damos **schemes, ssize_t nr_schemes); 456 int damon_nr_running_ctxs(void); 457 458 int damon_start(struct damon_ctx **ctxs, int nr_ctxs); 459 int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); 460 461 #endif /* CONFIG_DAMON */ 462 463 #ifdef CONFIG_DAMON_VADDR 464 465 /* Monitoring primitives for virtual memory address spaces */ 466 void damon_va_init(struct damon_ctx *ctx); 467 void damon_va_update(struct damon_ctx *ctx); 468 void damon_va_prepare_access_checks(struct damon_ctx *ctx); 469 unsigned int damon_va_check_accesses(struct damon_ctx *ctx); 470 bool damon_va_target_valid(void *t); 471 void damon_va_cleanup(struct damon_ctx *ctx); 472 int damon_va_apply_scheme(struct damon_ctx *context, struct damon_target *t, 473 struct damon_region *r, struct damos *scheme); 474 int damon_va_scheme_score(struct damon_ctx *context, struct damon_target *t, 475 struct damon_region *r, struct damos *scheme); 476 void damon_va_set_primitives(struct damon_ctx *ctx); 477 478 #endif /* CONFIG_DAMON_VADDR */ 479 480 #ifdef CONFIG_DAMON_PADDR 481 482 /* Monitoring primitives for the physical memory address space */ 483 void damon_pa_prepare_access_checks(struct damon_ctx *ctx); 484 unsigned int damon_pa_check_accesses(struct damon_ctx *ctx); 485 bool damon_pa_target_valid(void *t); 486 int damon_pa_apply_scheme(struct damon_ctx *context, struct damon_target *t, 487 struct damon_region *r, struct damos *scheme); 488 int damon_pa_scheme_score(struct damon_ctx *context, struct damon_target *t, 489 struct damon_region *r, struct damos *scheme); 490 void damon_pa_set_primitives(struct damon_ctx *ctx); 491 492 #endif /* CONFIG_DAMON_PADDR */ 493 494 #endif /* _DAMON_H */ 495